###############################################################################
###############################################################################
## Create parameter list                                                     ##
###############################################################################
###############################################################################

## Set project directory
# projectDir <- gsub("scripts/bulkRNAseq_workflow/analyses/Main_Analysis", "", getwd())
projectDir <- paste0(unlist(strsplit(getwd(), "scripts"))[1])
designDir <- unlist(strsplit(getwd(), "analyses/"))[1]

pipelineList <- biologicSeqTools2::assembleBiologicProject(
  ## Path to the design file. Essential columns: sample.id, sample.group, dataseries. ##
  ## Ideal sample name: [dataseries]_[sample.group]_[replicate]
  designFN =  paste0(designDir, "design/design.table.txt"),
  ## Path to model table ##
  modelFN =  paste0(designDir, "design/model.table.txt"),
  ## Path to NFcore setting file. Set to NULL in no-alignment mode.
  #NFcoreSettingsFN = "/camp/stp/babs/working/boeings/Projects/goulda/adrien.franchet/472A_brains_from_drosophila_larvae_RN21220/workdir/bulkRNAseq_workflow/design/RN21220test.NFcore.samplesheet.file.csv",
  ## Path to relevant FASTQ files.
  #pathToSeqStorageFolder = c(
  #    "/camp/stp/babs/inputs/sequencing/data/goulda/adrien.franchet/RN21220/primary_data/211203_A01366_0104_AH3WVGDMXY/fastq/",
  #    "/camp/stp/babs/inputs/sequencing/data/goulda/adrien.franchet/RN21220/primary_data/211130_A01366_0101_BH3YWKDMXY/fastq/"
  #),
  ## Path to RSEM count table. Essential.
  countTableFN = paste0(projectDir, "data/rsem.count.txt"),
  ## Path to TPM table.
  TpmTableFN =   paste0(projectDir, "data/dfTPM.txt"),
  biologicSettingsFN = paste0(designDir, "design/biologic.settings.file.csv"),
  PcaFN =  NULL,
  #"/camp/stp/babs/working/boeings/Projects/goulda/adrien.franchet/472A_brains_from_drosophila_larvae_RN21220/workdir/data/dfPca.txt",
  calculate_DGE = TRUE,
  calculate_LRT = TRUE,
  ## Path to external DEseq2 output files
  DEseq2External_DGE = NULL,
  #"/camp/stp/babs/working/boeings/Projects/goulda/adrien.franchet/472A_brains_from_drosophila_larvae_RN21220/workdir/data/DEseq2External_DGE/",
  DEseq2External_LRT = NULL,
  #"/camp/stp/babs/working/boeings/Projects/goulda/adrien.franchet/472A_brains_from_drosophila_larvae_RN21220/workdir/data/DEseq2External_LRT/",
  stranded = TRUE,
  read.length = "100bp",
  paired.end = TRUE,
  #pathToRSEMresultsFiles = paste0("/camp/stp/babs/working/boeings/Projects/goulda/adrien.franchet/472_brains_from_drosophila_larvae_RN21220/workdir/", "RSEM/Ensembl/"),
  projectFolder = projectDir,
  experiment_id = "bulkliverdemo",
  project_name = "Liver cancer pseudo bulk RNA-seq demo from Kang et al",
  lims.id = "bulkliverdemo",
  labname = "Park Lab",
  NtopGenes = 500,
  experiment.type = "bulk_rna_seq",
  species = "homo_sapiens",
  release = "release-105",
  count.table.headline = "Expression Intensities for all Samples",
  count.table.sidelabel = "Expr",
  heatmap.headline.text = "Heatmap: Row-averaged Expr",
  designTScol = NULL,
  timecourse.units = NULL,
  primDataDB = "demogtex_data",
  db.user = "babs",
  host = "clvd1-db-u-p-31.thecrick.org",
  lab.categories.table = "demogtex_lab_categories",
  corGeneVec = c("EZH2")
  #corGeneVec = NULL
)
chnkPrefix <- "set.parameters."
VersionPdfExt <- VersionPdfExt <- paste0(".",chnkPrefix,"V", gsub("-", "", Sys.Date()), ".pdf")

Data Preparation

Load Design File

###############################################################################
## Create biologic Metadata object                                           ##

if (file.exists(pipelineList[["biologicSettingsFN"]])){
    dfObio <- read.csv(pipelineList[["biologicSettingsFN"]], header = F, stringsAsFactors = F)
} else {
    stop("biologic settings file not found.")
}



dfObio <- data.frame(t(dfObio), stringsAsFactors = F)
dfObio[is.na(dfObio)] <- ""
colnames(dfObio) <- t(dfObio[1,])
dfObio <- dfObio[-1,]



for (i in 1:ncol(dfObio)){
    pos <- grep("#", dfObio[,i], useBytes = TRUE)
    if (length(pos) > 0){
        dfObio[pos, i] <- ""
    }
}

##                                                                           ##
###############################################################################
###############################################################################
## dbDetailList                                                              ##
dbDetailList <- list(
    "primDataDB" = as.vector(dfObio$primDataDB[1]),
    "ref.cat.db" = as.vector(dfObio$ref.cat.db[1]),
     "db.user" = as.vector(dfObio$db.user[1]),
     "host" = as.vector(dfObio$host[1])
)
## End dbDetailList                                                          ##
###############################################################################

###############################################################################
## Project detail list                                                       ##

fastqFolders <- as.vector(dfObio$fastqFolders)
fastqFolders <- fastqFolders[fastqFolders != ""]
lastChr <- sapply(fastqFolders, function(x) substr(x, nchar(x), nchar(x)))

fastqFolders <- ifelse(lastChr != "/", paste0(fastqFolders, "/"), fastqFolders)

corGeneVec <- as.vector(dfObio$corGeneVec)

if (!(is.null(corGeneVec))){
    corGecorGeneVec <- corGeneVec[corGeneVec != ""]
}



projectDetailList <- list(

    "RSEMcountDataFile" = as.vector(dfObio$RSEMcountDataFile[1]),
    
    "folder" = as.vector(dfObio$folder[1]),
    "primaryAlignmentGeneID" = as.vector(dfObio$primaryAlignmentGeneID[1]),
    "paired.end" =  as.logical(dfObio$paired.end[1]),
    "stranded" = as.logical(dfObio$stranded[1]),
    "labname" = as.vector(dfObio$labname[1]),
    "projectName" = as.vector(dfObio$project_name[1]),
    "read.length" =  as.vector(dfObio$read.length[1]),
    
    
    
    "modelFN" = as.vector(dfObio$modelFN[1]),
    "baseDesignFN" = as.vector(dfObio$baseDesignFN[1]),
    "TpmTableFN" = as.vector(dfObio$TpmTableFN[1]),
    "PcaFN" = as.vector(dfObio$PcaFN[1]),
    
    "DEseq2_DGE_result_folder" = as.vector(dfObio$DEseq2_DGE_result_folder[1]),
    "DEseq2_LRT_result_folder" = as.vector(dfObio$DEseq2_LRT_result_folder[1]),
    
    "DEseq2External_DGE" = as.vector(dfObio$DEseq2External_DGE[1]),
    "DEseq2External_LRT" = as.vector(dfObio$DEseq2External_LRT[1]),
    
    "calculate_DGE" = as.logical(dfObio$calculate_DGE[1]),
    "calculate_LRT" = as.logical(dfObio$calculate_LRT[1]),
    
    "designFN" = as.vector(dfObio$designFN[1]),
    "DGEmodelFN" = as.vector(dfObio$DGEmodelFN[1]),
    "DEseq2betaPrior" = as.logical(dfObio$DEseq2betaPrior[1]),
    "AlignFASTQcolumn" = as.vector(dfObio$AlignFASTQcolumn[1]),
    "NtopGenes" = as.vector(dfObio$NtopGenes[1]),
    "designTScol" = as.vector(dfObio$designTScol[1]),
    "corGeneVec" = corGeneVec,
    "batchMode" = as.logical(dfObio$batchMode[1]),
    "parallelProcessing" = as.logical(dfObio$parallelProcessing[1]),
    "ModuleFASTQC" = as.vector(dfObio$ModuleFASTQC[1]),
    "countTableFN" = as.vector(dfObio$countTableFN[1]),

    
    "ModuleTrimGalore" = as.vector(dfObio$ModuleTrimGalore[1]),
    "TrimGaloreMinLength" = as.vector(dfObio$TrimGaloreMinLength[1]),
    "TrimGaloreMinQuality" = as.vector(dfObio$TrimGaloreMinQuality[1]),

    "lab.categories.table" = as.vector(dfObio$lab.categories.table[1]), # default NULL
    "sra.id.vector" = as.vector(dfObio$sra.id.vector[1]),
    "gse.id.vector" = as.vector(dfObio$gse.id.vector[1]),
    "lims.id" = as.vector(dfObio$lims.id[1]),
    "experiment.type" = as.vector(dfObio$experiment.type[1]),   
    "species" = as.vector(dfObio$species[1]), 
    "release" = as.vector(dfObio$release[1]), 

    "project_id" = as.vector(dfObio$project_id[1]),
    "labname" = as.vector(dfObio$labname[1]),

    "timecourse.units" = as.vector(dfObio$timecourse.units[1]),
    "count.table.headline" = as.vector(dfObio$count.table.headline[1]),
    "count.table.sidelabel" = as.vector(dfObio$count.table.headline[1]),
    "heamap.headline.text" = as.vector(dfObio$count.table.headline[1]),
    "pathToSeqStorageFolder" = fastqFolders,
    "corGeneVec" = dfObio$corGeneVec[dfObio$corGeneVec != ""]
)
## End project detail list                                                   ##
###############################################################################

###############################################################################
## Project Parameters                                                        ##
documentationParams <- list(

    "title" = as.vector(dfObio$title[1]),
    "subtitle" =  as.vector(dfObio$subtitle[1]),
    "abstract" = as.vector(dfObio$abstract[1])

)


## Done Project Params                                                       ##
###############################################################################



###############################################################################
## Reference Table List                                                      ##
dfRefTab <- dfObio[,grep("referenceTableListDB", names(dfObio))]

referenceTableList = list()

if (ncol(dfRefTab) > 0){
    for (i in 1:ncol(dfRefTab)){
        referenceTableList[[as.vector(dfRefTab[1,i])]] <- as.vector(dfRefTab[2,i])
        
    }
## To be added: Check tables against database    
}

    # mysigdb_sc_sig
    # cibersort_L22
    # Allen_Brain_Atlas                            |
    # CORUM                                        |
    # ChEA_2016                                    |
    # DEPOD_phosphatase_substrates                 |
    # ENCODE_TF_ChIP_seq_2015                      |
    # GO_Biological_Process_2017                   |
    #LINCS_L1000_Chem_Pert_down                   |
    #LINCS_L1000_Chem_Pert_down_backup            |
    # LINCS_L1000_Chem_Pert_up                     |
    # LINCS_L1000_Chem_Pert_up_backup              |
    # NCBI_homologene_table                        |
    # Old_CMAP_down                                |
    # Old_CMAP_up                                  |
    # SGP_from_GEO_up_down_combined                |
    # SILAC_Phosphoproteomics                      |
    # TRANSFAC_and_JASPAR_PWMs                     |
    # UK_Biobank_GWAS                              |
    # ag_lab_categories                            |
    # as_lab_categories                            |
    # bader_lab_hESC_reference                     |
    # bt_lab_categories                            |
    # cat_selection_default                        |
    # cs_lab_categories                            |
    # da_lab_categories                            |
    # es_lab_categories                            |
    # esl111_cat_reference_db_table                |
    # et_lab_categories                            |
    # exploration_categories                       |
    # fg_lab_categories                            |
    # fi_lab_categories                            |
    # gk_lab_categories                            |
    # innateDB_PPI                                 |
    # jb_lab_categories                            |
    # js_lab_categories                            |
    # kn_lab_categories                            |
    # mysigdb_c1_positional                        |
    # mysigdb_c2_1329_canonical_pathways           |
    # mysigdb_c2_KEGG                              |
    # mysigdb_c2_REACTOME                          |
    # mysigdb_c2_biocarta                          |
    # mysigdb_c2_chemical_and_genetic_pertubations |
    # mysigdb_c3_TF_targets                        |
    # mysigdb_c3_miRNA_targets 
# et_lab_categories                            |
# | exploration_categories                       |
# | fg_lab_categories                            |
# | fgl391_cat_reference_db_table                |
# | fi_lab_categories                            |
# | gk_lab_categories                            |
# | innateDB_PPI                                 |
# | jb_lab_categories                            |
# | js_lab_categories                            |
# | kn_lab_categories                            |
# | mysigdb_c1_positional                        |
# | mysigdb_c2_1329_canonical_pathways           |
# | mysigdb_c2_KEGG                              |
# | mysigdb_c2_REACTOME                          |
# | mysigdb_c2_biocarta                          |
# | mysigdb_c2_chemical_and_genetic_pertubations |
# | mysigdb_c3_TF_targets                        |
# | mysigdb_c3_miRNA_targets                     |
# | mysigdb_c5_BP                                |
# | mysigdb_c5_CC                                |
# | mysigdb_c5_MF                                |
# | mysigdb_c6_oncogenic_signatures              |
# | mysigdb_c7_immunologic_signatures            |
# | mysigdb_h_hallmarks                          |
# | networkcategories                            |
# | nl_lab_categories                            |
# | pa_lab_categories                            |
# | pb_lab_categories                            |
# | pfam_interpro                                |
# | pp_lab_categories                            |
# | project_db_table                             |
# | project_db_table_backup                      |
# | project_description_table                    |
# | pt_lab_categories                            |
# | re_lab_categories                            |
# | reference_categories_db_new                  |
# | rl_lab_categories                            |
# | sb_lab_categories                            |
# | sc_lab_categories                            |
# | sl_lab_categories                            |
# | sl_lab_categories_backup                     |
# | ss_lab_categories                            |
# | st_lab_categories                            |
# | temp_categories                              |
# | vp_lab_categories                            |
# | vt_lab_categories

## Done                                                                      ##
###############################################################################

# Species has to be "mus_musculus", "homo_sapiens", "danio_rerio" 
# release-86, release-89

## Create defaults ##




Obio = new(
    "bioLOGIC",
    documentationParams = documentationParams,
    dbDetailList = dbDetailList,        
    projectDetailList = projectDetailList,
    referenceTableList = referenceTableList,
    parameterList = projectDetailList
)


# In the future this will be done using the more general
# Obio <- biologicSeqTools2::setGenomeAndGeneNameTable(Obio)
# function

## Temporary fix ##
if (Obio@parameterList$species == "homo_sapiens"){
    Obio@parameterList$primaryAlignmentGeneID <- "ENSG"
    Obio@parameterList$geneIDcolumn <- "hgnc_symbol"
} else if (Obio@parameterList$species == "mus_musculus"){
    Obio@parameterList$primaryAlignmentGeneID <- "ENSMUSG"
    Obio@parameterList$geneIDcolumn <- "mgi_symbol"
} else if (Obio@parameterList$species == "gallus_gallus"){
    Obio@parameterList$primaryAlignmentGeneID <- "ENSGALG"
    Obio@parameterList$geneIDcolumn <- "gg_symbol"
} else {
    hstring <- unlist(strsplit(Obio@parameterList$species, "_"))
    
    hstring2 <- hstring
    hstring2[1] <- substr(hstring2[1], 1,2)
    hstring2[2] <- substr(hstring2[2], 1,1)
    hstring2 <- toupper(paste0(hstring2, collapse = ""))
    primaryAlignmentGeneID <- paste0("ENS", hstring2, "G")
    Obio@parameterList$primaryAlignmentGeneID <- primaryAlignmentGeneID
    
    geneID <- paste0(substr(hstring, 1, 1), collapse = "")
    geneID <- paste0(geneID, "_symbol")
    Obio@parameterList$geneIDcolumn <- geneID
}

## Create analysis folders in the working directory
Obio <- biologicSeqTools2::createAnalysisFolders(
    Obio
)

## Set additional parameters
Obio <- biologicSeqTools2::setDataBaseParameters(Obio)
## Create gene annotation if it is not present ##
## To be activated ##

species <- Obio@parameterList$species

species <- unlist(strsplit(species, "_"))
species[1] <- substr(species[1], 1, 1)
selString <- paste0(species, collapse = "") 
selString <- paste0(selString, "_gene_ensembl")

if (is.null(Obio@parameterList$release)){
    release <- "release-95"
} else {
    release <- Obio@parameterList$release
}

## release95 is no longer available
if (release == "release-95"){
    release <- "release-98"
}

releaseID <- gsub("release-", "", tolower(release))

## Lookup biomart url
lookupTable <- biomaRt::listEnsemblArchives()
lookupTable <- lookupTable[lookupTable$version  == releaseID, ]

if (nrow(lookupTable) > 0){
    biomartURL <- as.vector(lookupTable[1,"url"])
} else {
    stop("No ensembl biomart table available for this release.")
}

primaryAlignmentGeneID <- Obio@parameterList$primaryAlignmentGeneID
geneIDcolumn <- Obio@parameterList$geneIDcolumn

Obio@parameterList$path2GeneIDtable <- paste0(projectDir, "data/project.gene.annotation.txt")

if (is.null(Obio@parameterList$path2GeneIDtable)){
    dfAnno <- biologicSeqTools2::createGeneNameTable(
        obj = Obio,
        biomart = "ENSEMBL_MART_ENSEMBL",
        selString = selString,
        host= biomartURL,
        primaryAlignmentGeneID = primaryAlignmentGeneID,
        geneIDcolumn = geneIDcolumn
    )
    
    dfAnno[dfAnno[,geneIDcolumn] == "", geneIDcolumn] <- dfAnno[dfAnno[,geneIDcolumn] == "", primaryAlignmentGeneID]
    
    write.table(dfAnno, paste0(projectDir, "data/project.gene.annotation.txt"), row.names=F, sep="\t")
    
    Obio@parameterList$path2GeneIDtable <- paste0(projectDir, "data/project.gene.annotation.txt")
    setTable <- TRUE
} else {
    setTable <- FALSE
}

## This can be upgraded to web retrieval of annotation data 
Obio <- biologicSeqTools2::addGeneAnnotation(Obio)

## For now we expect a gene annotation file to be specified already
#Obio <- biologicSeqTools2::setCrickGenomeAndGeneNameTable(Obio)

# In the future this will be done using the more general
# biologicSeqTools2::setGenomeAndGeneNameTable
# function

## Create analysis folders in the working directory
# Obio <- biologicSeqTools2::createAnalysisFolders(
#     Obio
# )
# 
# ## Set additional parameters
# Obio <- biologicSeqTools2::setDataBaseParameters(Obio)
# 
# ## This can be upgraded to web retrieval of annotation data 
# Obio <- biologicSeqTools2::addGeneAnnotation(Obio)

 
## Create shiny path for figure outputs ##           
shinyURL <- paste0(
    "https://",
    Obio@parameterList[["shinyBaseServerURL"]],
    "/shiny/boeings/",
    Obio@parameterList$project_id,
    "_app/"
)            
        
    

## Create outputfolders ##
# if (!dir.exists(paste0(Obio@parameterList$localWorkDir,Obio@parameterList$project_id))){
#     dir.create(paste0(Obio@parameterList$localWorkDir,Obio@parameterList$project_id))
# }

Obio@parameterList[["html_local"]] <- paste0(Obio@parameterList$folder, "html_local/")

if (!dir.exists(Obio@parameterList[["html_local"]])){
    dir.create(Obio@parameterList[["html_local"]])
}

Obio@parameterList[["reportFigDir"]] <- paste0(Obio@parameterList$html_local, "report_figures/")
if (!dir.exists(Obio@parameterList$reportFigDir)){
    dir.create(Obio@parameterList$reportFigDir)
}

pdfTemp <- paste0(Obio@parameterList$reportFigDir, "temp.pdf")

Obio@parameterList[["reportTableDir"]] <- paste0(Obio@parameterList$html_local, "report_tables/")
if (!dir.exists(Obio@parameterList$reportTableDir)){
    dir.create(Obio@parameterList$reportTableDir)
}


## Create data dir
Obio@parameterList[["data_dir"]] <- paste0(Obio@parameterList$folder, "data/")

if (!dir.exists(Obio@parameterList$data_dir)){
    dir.create(Obio@parameterList$data_dir)
}


## Set default for database connections ##
pos <- grep("^host$", names(Obio@dbDetailList))
if (length(pos) ==0 ){
    Obio@dbDetailList$host <- NULL
    
    if (is.null(Obio@dbDetailList)){
        Obio@dbDetailList = list("host" = NULL)
    }
    
    upload.results.to.database <- FALSE
    print("No database server provided. upload.results.to.database set to FALSE")
    
}

if (!is.null(Obio@dbDetailList$host)){
    if (Obio@dbDetailList$host == "10.27.241.234"){
        urlString <- "biologic.thecrick.org"
    } else {
        urlString <- "biologic.crick.ac.uk"
    }    
} else {
    urlString <- ""
}


## Temporary fix functoin
#Obio <- biologicSeqTools2::setCrickGenomeAndGeneNameTable(Obio)

if (setTable){
    Obio@parameterList$path2GeneIDtable <- paste0(projectDir, "data/project.gene.annotation.txt")
}



source("save.biologic.robj.R")

##                                                                           ##
###############################################################################

Option to Load Data Into R

Download variables

With the parmeters below, you can import the data relating to this RNA-Seq projet directly into a R-session.

Here we set the database access variables so we can get your data in

username <- "demogtex_da"
pass <- "KGp2S7cC"
host <- "clvd1-db-u-p-31.thecrick.org"
db <- "demogtex_data"
port <- 6008
Here we define the tables from which we draw the data:
designTB <- "bulkliverdemo_designTable"
mainTB <- "bulkliverdemo_bulk_rna_seq_table"
countTB <- "bulkliverdemo_count_table"
pcaTB <- "bulkliverdemo_PCA"
And here we set a few more variables we’ll need for plotting:
species <- "homo_sapiens"
geneIDcolumn <- "hgnc_symbol"
alignmentGeneID <- "ENSG"

Load Data into R

Next, let’s load your project data. We will need this as a basis for making the plots further down.

## The line below will install an R-package that we need to connect to the  Crick database
devtools::install_github("decusinlabore/bioLOGIC")

library(bioLOGIC)
## Load the design table from database. Here we will retrieve information on samples. 
dfDesign <- import.db.table.from.db(
    dbname = db,
    dbtable = designTB,
    host = host,
    user = username,
    password = pass,
    port = port
)
## Load main data table from database. In this table a lot of gene-level information for this project is assembled. 
dfMainData <- import.db.table.from.db(
    dbname = db,
    dbtable = mainTB,
    host = host,
    user = username,
    password = pass,
    port = port
)
## Load RSEM count data table. 
dfCountData <- import.db.table.from.db(
    dbname = db,
    dbtable = countTB,
    host = host,
    user = username,
    password = pass,
    port = port
)
  
## Load main pca table from database. This table contains cell-level PCA information.
dfPCA <- import.db.table.from.db(
    dbname = db,
    dbtable = pcaTB,
    host = host,
    user = username,
    password = pass,
    port = port
)
## For some plots we want to limit the number of genes to the most interesting, so let's get those in a vector:
# Most variable gene names
dfVar <- dfMainData[dfMainData$logFC_cut_off != 0 ,c(geneIDcolumn, alignmentGeneID)]
mostVarGenes <- as.vector(unique(sort(dfVar[,geneIDcolumn])))
mostVarGenes <- na.omit(mostVarGenes[mostVarGenes != ""])
# Most variable gene IDs
mostVarIDs <- as.vector(unique(sort(dfVar[,alignmentGeneID])))
mostVarIDs <- na.omit(mostVarIDs[mostVarIDs != ""])
chnkPrefix <- "add.data"
VersionPdfExt <- VersionPdfExt <- paste0(".",chnkPrefix,"V", gsub("-", "", Sys.Date()), ".pdf")
## [1] "RSEM file not (correctly) specified"

Alignment Summary

For a summary of the alignment parameters, review the QC tab.

The following tables form the basis for this analysis:

Read count table: 
/nemo/stp/babs/working/boeings/Projects/boeings/stefan.boeing/621_demo_liverdemo_bulkRNAseq/data/rsem.count.txt
TPM table: 
/nemo/stp/babs/working/boeings/Projects/boeings/stefan.boeing/621_demo_liverdemo_bulkRNAseq/data/dfTPM.txt

RNA-Seq Analysis Guide

TECHNICAL CHECKS

Quality Control (QC)

You may want to start by reviewing the quality of the underlying RNA and RNA-sequencing. This is done in the Quality control (QC) section. Things to look for are the percentage of reads that aligned to intergenic regions (indicating DNA contamination in your RNA) and the sequence duplication rate (which might be a PCR artefact introduced during sample amplification and may affect the complexity of the sequenced library).

cat(paste0("A summary of this experiment and its results so far is summarized in an editable [report](https://biologic.crick.ac.uk/",Obio@parameterList$project_id,"/report.php). The purpose of this section is to provide a brief summary of the experiment as well as of its results to a person who is interested in your dataset in the distant future. Ideally, this section contains all information necessary to understand the experiment as well as its results so far. You may download this presentation via the link at the bottom of the page, edit it in whatever way you deem suitable and send it back to bioinformatics for updating. You may also want to add a powerpoint presentation outlining the rationale for this experiment as a [slide show](https://biologic.crick.ac.uk/",Obio@parameterList$project_id,"/about.php)."))

A summary of this experiment and its results so far is summarized in an editable report. The purpose of this section is to provide a brief summary of the experiment as well as of its results to a person who is interested in your dataset in the distant future. Ideally, this section contains all information necessary to understand the experiment as well as its results so far. You may download this presentation via the link at the bottom of the page, edit it in whatever way you deem suitable and send it back to bioinformatics for updating. You may also want to add a powerpoint presentation outlining the rationale for this experiment as a slide show.

Parameter Tables

Display Design File

dfDesign <- Obio@dfDesign


selVec <- c(
    names(dfDesign)[grep('sampleID', names(dfDesign))],
    names(dfDesign)[grep('original.sample.name', names(dfDesign))],
    names(dfDesign)[grep('sample.id', names(dfDesign))],
    names(dfDesign)[grep('^sample.group$', names(dfDesign))],
    names(dfDesign)[grep('^dataseries$', names(dfDesign))],
    names(dfDesign)[grep('^f_', names(dfDesign))],
    names(dfDesign)[grep('comp_', names(dfDesign))],
    names(dfDesign)[grep('LRT_', names(dfDesign))]
    
)


selVec <- selVec[selVec %in% names(Obio@dfDesign)]    


if (length(selVec) > 1){
    dfDesign <- unique(dfDesign[,selVec])
}
    
colnames <- gsub("_", " ", names(dfDesign))
colnames <- gsub("comp", "DGE", colnames)
colnames <- gsub("^f_", "Factor_", colnames)
colnames <- gsub("[.]", " ", colnames)
colnames <- gsub("original.sample.name", "original", colnames)



DT::datatable(
    dfDesign,
    colnames = colnames,
    rownames = FALSE,
    options = list(
        initComplete = htmlwidgets::JS(
            "function(settings, json) {",
            "$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});",
            "}"
        ) #,
    #order = list(list(3, 'asc'), list(2, 'desc'))
    )
) 

Table 1: Design table for this experiment outlining original file names, sample names and group definitions for differential gene expression analysis.

figCap2 = paste0(
    "**Table ",
    Obio@parameterList$tableCount,
    ":** Design table for this experiment outlining original file names, sample names and group definitions for differential gene expression analysis"
)

Display Formula Table

dfModel <- Obio@dfModel
# selVec <- c(
#     names(dfDesign)[grep('sampleID', names(dfDesign))],
#     names(dfDesign)[grep('original.sample.name', names(dfDesign))],
#     names(dfDesign)[grep('sample.id', names(dfDesign))],
#     names(dfDesign)[grep('^f_', names(dfDesign))]
#     names(dfDesign)[grep('comp_', names(dfDesign))],
#     names(dfDesign)[grep('LRT_', names(dfDesign))]
#     
# )
# 
# selVec <- selVec[selVec %in% names(Obio@dfDesign)]
# 
# dfDesign <- unique(dfDesign[,selVec])
# colnames <- gsub("_", " ", names(dfDesign))
# colnames <- gsub("comp", "DGE", colnames)
# colnames <- gsub("[.]", " ", colnames)
# colnames <- gsub("original.sample.name", "original", colnames)


if (nrow(dfModel) > 0){
    DT::datatable(
        dfModel,
        #colnames = colnames,
        rownames = FALSE,
        options = list(
            initComplete = htmlwidgets::JS(
                "function(settings, json) {",
                "$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});",
                "}"
            ),
        order = list(list(3, 'asc'), list(2, 'desc'))
        )
    ) 
}

Table 2: Design table for this experiment outlining original file names, sample names and group definitions for differential gene expression analysis

DESeq2 Analysis

Create DESeq2 object

###############################################################################
## Create and attach DDS object                                              ##

if (is.null( Obio@parameterList$parallelProcessing) || length(Obio@parameterList$parallelProcessing) == 0){
     Obio@parameterList$parallelProcessing <- FALSE
}

Obio <- biologicSeqTools2::createDdsObject(Obio)

## Done creating and attaching DDS objecs                                    ##
###############################################################################

Variation and PCA

###############################################################################
## Determine variation in the data set                                       ##

## This is a tempory fix until the PCA function has been fully equipped for
## batch correction
saveMode <- Obio@parameterList$batchMode
Obio@parameterList$batchMode <- FALSE

Obio <- biologicSeqTools2::addCoVar(
    obj = Obio,
    avgCountCutOffperSample = 1,
    selectionColName = "aboveCutOff",
    dfBaseData = Obio@DESeqNormReadCountsTable,
    rowNameID = Obio@parameterList$primaryAlignmentGeneID
    #options: "DEseq2RV" or "CoVar"
)

## Results are in slot Obio@dataTableList$dfRowVar ##

## Diagnostic plot ##
# library(ggplot2)
# countCutOff <- 0
# pCoVar <- ggplot(
#     data=Obio@dataTableList$dfRowVar[Obio@dataTableList$dfRowVar$avgCountsPerGenePerSample > countCutOff ,],
#     aes(
#         x=CoVar,
#         y=DEseq2RV
#     )) + geom_point(
#     )  + labs(title = "Variation" ,x = "CoVar", y = "DeSeq2Var"
#     ) +  theme(
#         axis.text.y   = element_text(size=8),
#         axis.text.x   = element_text(size=8),
#         axis.title.y  = element_text(size=8),
#         axis.title.x  = element_text(size=8),
#         axis.line = element_line(colour = "black"),
#         panel.border = element_rect(colour = "black", fill=NA, size=1),
#         plot.title = element_text(hjust = 0.5, size = 12)
#     )

## Done determine variation in the data set                                  ##
###############################################################################

###############################################################################
## Perform PCA, MV-analysis, and Clusterdendrogram                           ##

## Select elements for PCA explicityly ##
dfSel <- Obio@dataTableList$dfRowVar

## Remove low count values ##
dfSel <- dfSel[dfSel$avgCountsPerGenePerSample >= 1, ]

## Order by variation
dfSel <- dfSel[order(dfSel$DEseq2RV, decreasing = TRUE),]
# dfSel <- dfSel[order(dfSel$CoVar, decreasing = TRUE),]

rowSelVec <- as.vector(
    dfSel[1:Obio@parameterList$NtopGenes,Obio@parameterList$primaryAlignmentGeneID]
)

Obio@dataTableList[["Ntop4pcaGeneSelection"]] <- rowSelVec



Obio <- doPCA(
    obj = Obio,
    pcaDimensionsToInvestigate = c(1:7),
    Ntop4pca = Obio@parameterList$NtopGenes, #500,
    avgCountCutOffperSample = 0,
    pcaSelectionVec = rowSelVec
)

Obio@parameterList$batchMode <- saveMode
## Do linear fittings to available dimensions ##


# depdendent Variation mode: Variables can be dependent (individual fits)


if (length(unique(Obio@dfDesign$dataseries)) > 1){
    independentDesignColSector <- c(
        "dataseries"
    )
} else {
    independentDesignColSector <- as.vector(NULL, mode = "character")
}

if (length(Obio@dfDesign$sample.group) > 1){
    independentDesignColSector <- c(
        independentDesignColSector,
        "sample.group"
    )
}

if (length(Obio@dfDesign$replicate) > 1){
    independentDesignColSector <- c(
        independentDesignColSector,
        "replicate"
    )
}

pos <- grep("^f_", names(Obio@dfDesign))
if (length(pos) > 0){
  independentDesignColSector <- c(
    names(Obio@dfDesign)[grep("^f_", names(Obio@dfDesign))]
  )

} else if (length(unique(Obio@dfDesign$dataseries)) > 1){
    independentDesignColSector <- c(
      "dataseries"
    )
}


# indedendentVariation mode: Variables are required to be independent
## Do linear variable fittings to PCA ##
# variables need to be independent > culmulative fit #
Obio <- biologicSeqTools2::doLinearFittings(
    obj = Obio,
    designColSelector = independentDesignColSector,
    mode = "independentVariation", ## "independentVariation" or "dependentVariation"
    Ntop4pca = Obio@parameterList$NtopGenes,
    plotname = "P2_independentVariables"
)



## Do linear variable fittings to PCA ##

# individual fit
LRTvec <- names(Obio@dfDesign)[grep("f_", names(Obio@dfDesign))]
#LRTvec <- LRTvec[LRTvec != "LRT_replicate"]
#LRTvec <- "LRT_timepoint"

dependentDesignColSelector<- c(
    independentDesignColSector,
    LRTvec,
    names(Obio@dfDesign)[grep("comp_", names(Obio@dfDesign))],
    names(Obio@dfDesign)[grep("LRT_", names(Obio@dfDesign))]
)
Obio <- biologicSeqTools2::doLinearFittings(
    obj = Obio,
    designColSelector = dependentDesignColSelector,
    mode = "dependentVariation", ## "independentVariation" or "dependentVariation"
    Ntop4pca = Obio@parameterList$NtopGenes,
    plotname = "P2_dependentVariables"
)


## Create Plot dendrogram ##
# Obio <- createSampleDendrogram(
#     obj = Obio,
#     Ntop4pca = Obio@parameterList$NtopGenes,
#     plotname = "dendrogram10000"
# )

###############################################################################
## Do differential gene expression analysis                                  ##
#Obio@parameterList$batchMode <- F
## LRT tests for multiple sample groups ##

# if (!is.null(Obio@projectDetailList$DEseq2_DGE_result_folder) && Obio@projectDetailList$DEseq2_DGE_result_folder != ""){
#     mode = "load_DGE_from_file"
#     Obio <- loadDESeq2outputFromFile(
#         Obio,
#         replace = TRUE
#     )
# } else {

    
#}


############################################################
## Function load DGE results from file
loadDESeq2outputFromFile <- function(
    Obio,
    DEseq2resultDir = "Obio@parameterList$DEseq2External_DGE",
    replace = FALSE,
    mode = NULL # Can be DGE or LRT
){
    
    if (is.null(mode)){
        if (DEseq2resultDir == "DEseq2External_LRT"){
            mode = "LRT"
        } else {
            mode = "DGE"
        }
    }
  
    allfiles <- paste0(DEseq2resultDir, "/", list.files(DEseq2resultDir))
    #allfiles <- allfiles[grep(".txt", allfiles)]
    allfiles <- gsub("//", "/", allfiles)
    allfiles <- sort(allfiles)
    contrastNames <- gsub(gsub("//", "/", paste0(DEseq2resultDir, "/")), "", allfiles)
    contrastNames <- gsub(".txt", "", contrastNames)

    ###############################################################################
    ## For this project only: Add ENSMUSG column                                 ##
    dfAnno <- Obio@dfGeneAnnotation
    dfAnno <- unique(
      Obio@dfGeneAnnotation[,c(Obio@parameterList$primaryAlignmentGeneID, Obio@parameterList$geneIDcolumn)]
    )
    
    ## Done adding ENSMUSG column                                                ##
    ###############################################################################
    for (i in 1:length(allfiles)){
      colName <- contrastNames[i]
      res <- read.delim(allfiles[i], header = T, sep="\t")
      
      
      ## In case gene_ids were saved in row names
      pos <- grep("gene_id", names(res))
      if (length(pos) == 0){
          res[["gene_id"]] <- row.names(res)  
      } else if (length(pos) == 1){
          row.names(res) <- res$gene_id
      }
      
      ################################
      ## This project only 
      
      
      selVec <- c("gene_id", "baseMean", "log2FoldChange", "lfcSE",  "pvalue", "padj")
      
      if (!sum(selVec %in% names(res)) == length(selVec)){
          stop(paste0("Check DESeq2 input files. Mandantory columns: ", paste0(selVec, collapse = ", ")))
      }
      
      res <- unique(res[,selVec])
      
      res <- res[!(duplicated(res[,"gene_id"])),]
      
      ## Remove all log-fcs with an NA padj
      res <- res[!is.na(res$padj),]
      res <- res[!is.na(res$log2FoldChange),]
      
      
      res[is.na(res)] <- 0
      res <- res[res$baseMean > 0,]
      
      # Plus one to avoid negative log2 baseMeans
      res$baseMean <- log2((res$baseMean + 1))
      ## Done 
      ################################
      
      
      #row.names(res) <- res[,Obio@parameterList$primaryAlignmentGeneID]
      
      names(res) = paste(names(res), colName, sep="_")
      names(res) <- gsub(paste0("gene_id_", colName), "gene_id", names(res))
      names(res) <- gsub("^gene_id$", Obio@parameterList$primaryAlignmentGeneID, names(res))
      #res[[Obio@parameterList$primaryAlignmentGeneID]] = rownames(res)
      
      ## log2 the base mean for lrt applications
      
      
      if (mode == "DGE"){
          names(res) = gsub("log2FoldChange", "logFC", names(res))
          names(res) = gsub(
            "logFC",
            paste("contrast_D", i, "_logFC", sep=""),
            names(res)
          )
          
          names(res) = gsub(
            "padj",
            paste("contrast_D", i, "_padj", sep=""),
            names(res)
          )
          
          names(res) = gsub(
            "stat",
            paste("contrast_D", i, "_stat", sep=""),
            names(res)
          )
          
          names(res) = gsub(
            "baseMean",
            paste("contrast_D", i, "_lg2BaseMean", sep=""),
            names(res)
          )
          
          #Remove all rows without a padj
          padj.col = grep("padj", names(res))[1]
          res[,padj.col][is.na(res[,padj.col])] = ""
          res = res[res[,padj.col] != "", ]
          res[,padj.col] <- as.numeric(res[,padj.col])
          
          ## Add log10p column ##
          padj  <- names(res)[grep("_padj_", names(res))]
          lg10p <- gsub("padj", "lg10p", padj)
          
          for (z in 1:length(padj)){
            preprocess <- as.numeric(res[,padj[z]])
            minNum <- min(preprocess[preprocess != 0])
            preprocess[preprocess == 0] <- minNum
            
            # if (length(grep("padj_LRT", padj[i])) > 0){
            #     preprocess <- as.numeric(res[,padj[z]])
            #     minNum <- min(preprocess[preprocess != 0])
            #     preprocess[preprocess == 0] <- minNum
            # } else {
            #     preprocess <- as.numeric(res[,padj[z]])
            # }
            
            temp <- -1*log10(preprocess)
            #temp[temp >= 50] = 50
            res[,lg10p[z]] <- temp
          }
          
          col.vector = c(
            Obio@parameterList$primaryAlignmentGeneID,
            names(res)[grep("contrast", names(res))]
          )
          
          res = res[,col.vector]
          
          ## Make all numeric columns numeric ##
          res[,grep("contrast_", names(res))] <- apply(res[,grep("contrast_", names(res))], 2, as.numeric)
      }
      
      if (mode == "LRT"){
               #res[[Obio@parameterList$primaryAlignmentGeneID]] = rownames(res)

                res$stat <- NULL

                
                names(res) = gsub(
                    "baseMean",
                    paste0("contrast_L_lg2BaseMean_"),
                    names(res)
                )

                names(res) = gsub(
                    "padj",
                    paste0("contrast_L_padj_"),
                    names(res)
                )

                #Remove all rows without a padj
                padj.col = grep("padj", names(res))[1]
                res[,padj.col][is.na(res[,padj.col])] = ""
                res = res[res[,padj.col] != "", ]
                res[,padj.col] <- as.numeric(res[,padj.col])

                ## Add log10p column ##
                padj  <- names(res)[grep("_padj_", names(res))]
                lg10p <- gsub("padj", "lg10p", padj)

                for (z in 1:length(padj)){
                    preprocess <- as.numeric(res[,padj[z]])
                    minNum <- min(preprocess[preprocess != 0])
                    preprocess[preprocess == 0] <- minNum

                    # if (length(grep("padj_LRT", padj[i])) > 0){
                    #     preprocess <- as.numeric(res[,padj[z]])
                    #     minNum <- min(preprocess[preprocess != 0])
                    #     preprocess[preprocess == 0] <- minNum
                    # } else {
                    #     preprocess <- as.numeric(res[,padj[z]])
                    # }

                    temp <- -1*log10(preprocess)
                    #temp[temp >= 50] = 50
                    res[,lg10p[z]] <- temp
                }

                col.vector = c(
                    Obio@parameterList$primaryAlignmentGeneID,
                    names(res)[grep("contrast", names(res))]
                )

                res = res[,col.vector]

                ## Make all numeric columns numierc ##
                ## Make all numeric columns numierc ##
                res[,grep("contrast_", names(res))] <- apply(res[,grep("contrast_", names(res))], 2, as.numeric)
      } # end LRT mode
      
      if (i == 1){
        dfContrastTable <- res
      } else {
        dfContrastTable <- merge(
          dfContrastTable,
          res,
          by.x = Obio@parameterList$primaryAlignmentGeneID,
          by.y = Obio@parameterList$primaryAlignmentGeneID,
          all = TRUE
        )
        dfContrastTable[is.na(dfContrastTable)] <- 0
      }
    }
    
    
    # head(dfContrastTable)
    
    dfContrastTable<- dfContrastTable[rowSums(dfContrastTable[,2:ncol(dfContrastTable)]) != 0, ]
    names(dfContrastTable) <- gsub("__", "_", names(dfContrastTable))
    
    if (mode == "LRT"){
        if (replace){
            Obio@DEseq2LRTtable <- data.frame(NULL)
            Obio@DEseq2LRTtable <- dfContrastTable
        } else if (nrow(Obio@DEseq2LRTtable) > 0){
            Obio@DEseq2LRTtable <- merge(
                Obio@DEseq2LRTtable,
                dfContrastTable,
                by.x = Obio@parameterList$primaryAlignmentGeneID,
                by.y = Obio@parameterList$primaryAlignmentGeneID,
                all = TRUE
            )
            Obio@DEseq2LRTtable[is.na(Obio@DEseq2LRTtable)] <- 0 
        } else {
            Obio@DEseq2LRTtable <- dfContrastTable
        }
    }
    
    if (mode == "DGE"){
        if (replace){
            Obio@DEseq2contrastTable <- data.frame(NULL)
            Obio@DEseq2contrastTable <- dfContrastTable
        } else if (nrow(Obio@DEseq2contrastTable) > 0){
            Obio@DEseq2contrastTable <- merge(
                Obio@DEseq2contrastTable,
                dfContrastTable,
                by.x = Obio@parameterList$primaryAlignmentGeneID,
                by.y = Obio@parameterList$primaryAlignmentGeneID,
                all = TRUE
            )
            Obio@DEseq2contrastTable[is.na(Obio@DEseq2contrastTable)] <- 0 
        } else {
            Obio@DEseq2contrastTable <- dfContrastTable
        }
    }
    
    
    
    
    if (mode == "LRT"){
      print("DESeq2 results loaded into Obio@DEseq2LRTtable")
    } else {
      print("DESeq2 results loaded into Obio@DEseq2contrastTable")
    }
    
    return(Obio)
}

## Done with function                                                        ##
###############################################################################

    
###############################################################################
## Add or calculate LRT results                                              ##
if (!is.null(Obio@parameterList$calculate_DGE) && is.logical(Obio@parameterList$calculate_DGE)) {
    calculate_LRT <- Obio@parameterList$calculate_LRT
} else {
    calculate_LRT <- TRUE 
}




pos <- grep("LRT_", names(Obio@dfDesign))
if (length(pos) == 0){
    calculate_LRT <- FALSE
}

if (calculate_LRT){
  dfDGE <- Obio@dfModel
    dfDGE <- dfDGE[dfDGE$test == "LRT",]
  
    if (nrow(dfDGE) > 0){
        Obio <- biologicSeqTools2::LRTanalysis(
            obj = Obio,
            createNewResultTable = TRUE
        )
    }
} 

## Add additional, external LRT files, if available
lrtFilesToAdd <- NULL
if (!is.null(Obio@parameterList$DEseq2External_LRT)){
    lrtFilesToAdd <- list.files(Obio@parameterList$DEseq2External_LRT)
}    
    
if (length(lrtFilesToAdd) > 0){
    Obio <- loadDESeq2outputFromFile(
        Obio,
        replace = TRUE,
        mode = "LRT",
        DEseq2resultDir = Obio@parameterList$DEseq2External_LRT
    )
} 


## Done adding and/or calculating LRT results                                 ##
################################################################################
  

################################################################################
## Add calculate DGE results                                                  ##

if (!is.null(Obio@parameterList$calculate_DGE) && is.logical(Obio@parameterList$calculate_DGE)) {
    calculate_DGE <- Obio@parameterList$calculate_DGE
} else {
    calculate_DGE <- TRUE  
}


pos <- grep("comp_", names(Obio@dfDesign))
if (length(pos) == 0){
    calculate_DGE <- FALSE
}

if (calculate_DGE){
  ## Pairwise differential gene expression ##
    ## If working on projects prior to fall 2018, make sure
    ## Obio@parameterList$DEseq2betaPrior is set to true.
    dfDGE <- Obio@dfModel
    dfDGE <- dfDGE[dfDGE$test == "Wald",]

    if (nrow(dfDGE) > 0){
        Obio <- biologicSeqTools2::DGEanalysis(
            obj = Obio,
            createNewResultTable = TRUE
        )
    }
  
}    

## If mode is not calculate DGE, a dge table needs to be provided
dgeFilesToAdd <- NULL
if (!is.null(Obio@parameterList$DEseq2External_DGE)){
    dgeFilesToAdd <- list.files(Obio@parameterList$DEseq2External_DGE)
}    
    
if (length(dgeFilesToAdd) > 0){
    Obio <- loadDESeq2outputFromFile(
        Obio,
        replace = FALSE,
        mode = "DGE",
        DEseq2resultDir = Obio@parameterList$DEseq2External_DGE
    )
}     

## Done adding and/or calculating LRT results                                 ##
################################################################################



## Done differential gene expresison analysis                                ##
###############################################################################

DEseq2 Differential Gene Expression Analysis

###############################################################################
## Create dfSummary                                                          ##



dfDGE <- Obio@DEseq2contrastTable
## Temporary Fix ##
## Remove empty pase means
rmVec <- grep("lg2BaseMean$", names(dfDGE))
if (length(rmVec) > 0){
    dfDGE <- dfDGE[,-rmVec]
}
## lg2 base mean ##
lg2Vec <- grep("lg2BaseMean", names(dfDGE))
if (length(lg2Vec ) > 0){
    for (i in 1:length(lg2Vec ))
    dfDGE[,lg2Vec[i]] <- log2(dfDGE[,lg2Vec[i]])
}

dfLRT <- Obio@DEseq2LRTtable

dfPCA <- Obio@dfPCAgenes


df.summary <- dfDGE

###############################################################################
## Calculate correlations                                                    ##
## Adding annotation ##
dfAnno <- unique(Obio@dfGeneAnnotation[,c(Obio@parameterList$primaryAlignmentGeneID, Obio@parameterList$geneIDcolumn)])
dfAnno <- dfAnno[dfAnno[,Obio@parameterList$primaryAlignmentGeneID] %in% df.summary[,Obio@parameterList$primaryAlignmentGeneID],]


pos <- grep("corGeneVec", names(Obio@parameterList))

if (length(pos) == 0){
    Obio@parameterList[["corGeneVec"]] <- NULL
}

if (!is.null(Obio@parameterList$corGeneVec)){
    hVec <- Obio@parameterList$corGeneVec
    dfAnnoCor <- dfAnno[dfAnno[,Obio@parameterList$geneIDcolumn] %in% hVec, ]

    Obio@parameterList$corGeneVec <- as.vector(dfAnnoCor[,Obio@parameterList$geneIDcolumn])
}

if (exists("dfAnnoCor")){
    if (nrow(dfAnnoCor) > 0){
        dfTPM <- Obio@dfTPM
        names(dfTPM) <- gsub("^X$", Obio@parameterList$primaryAlignmentGeneID, names(dfTPM))
        names(dfTPM) <- gsub("^gene_id$", Obio@parameterList$primaryAlignmentGeneID, names(dfTPM))

        names(dfTPM) <- paste0("norm_counts_", names(dfTPM ))
        names(dfTPM) <- gsub(
        paste0(
          "norm_counts_", Obio@parameterList$primaryAlignmentGeneID),
          Obio@parameterList$primaryAlignmentGeneID,
          names(dfTPM)
        )
        
        row.names(dfTPM) <- dfTPM[,Obio@parameterList$primaryAlignmentGeneID]
        dfTPM[,Obio@parameterList$primaryAlignmentGeneID] <- NULL
        for (k in 1:nrow(dfAnnoCor)){
            ###############################################################################
            ## do correlation analysis                                                   ##
    
            pValueCor = rep(1, nrow(dfTPM))
            corCoef = rep(0, nrow(dfTPM))
            cor.method = "pearson"
    
            geneSel <- as.vector(dfAnnoCor[k, Obio@parameterList$primaryAlignmentGeneID])
            pattern <- as.numeric(dfTPM[geneSel, ])
    
            #Find best correlation with kinase expression
            print("Starting to calculate correlations...")
            for (i in 1:nrow(dfTPM)){
                samplePattern <- as.numeric(t(dfTPM[i,]))
    
                if (sum(samplePattern) != 0){
                    cor.test.result = cor.test(samplePattern, pattern, method=cor.method)
                    pValueCor[i] = cor.test.result$p.value
                    corCoef[i] = cor.test.result$estimate
                }
                if (i%%1000 == 0){
                    print(i)
                }
            }
            print("...done.")
    
            dfTPM[["pValueCor"]] <- pValueCor
            dfTPM[["corCoef"]] <- corCoef
    
            dfTPM <- dfTPM[order(dfTPM$corCoef, decreasing = TRUE),]
            dfTempRes <- dfTPM
            dfTempRes[[Obio@parameterList$primaryAlignmentGeneID]] <- row.names(dfTempRes)
            dfTempRes <- dfTempRes[,c("corCoef", Obio@parameterList$primaryAlignmentGeneID)]
            names(dfTempRes) <- gsub("corCoef", paste0("corCoef_", as.vector(dfAnnoCor[k, Obio@parameterList$geneIDcolumn])), names(dfTempRes))
    
            if (k==1){
                dfTRes <- dfTempRes
            } else {
                dfTRes <- merge(
                    dfTRes,
                    dfTempRes,
                    by.x = Obio@parameterList$primaryAlignmentGeneID,
                    by.y = Obio@parameterList$primaryAlignmentGeneID,
                    all =TRUE
                )
                dfTRes[is.na(dfTRes)] <- 0
            }
    
    
            ## Done correlation analysis                                                 ##
            ###############################################################################
        }
    
    
        Obio@dataTableList[["geneCorTables"]] <- dfTRes
    }
}

## Done calculate correlations                                               ##
###############################################################################




###############################################################################
## Adding annotation to df.summary                                           ##
dfNormCounts <- Obio@DESeqNormReadCountsTable
names(dfNormCounts) <- paste0("DEseq2NormalizedReadCounts_", names(dfNormCounts))
dfNormCounts[[Obio@parameterList$primaryAlignmentGeneID]] <- row.names(dfNormCounts)

dfTPM <- Obio@dfTPM

## In case other names are used as col names
names(dfTPM) <- gsub("^X$", Obio@parameterList$primaryAlignmentGeneID, names(dfTPM))
names(dfTPM) <- gsub("^gene_id$", Obio@parameterList$primaryAlignmentGeneID, names(dfTPM))

names(dfTPM) <- paste0("norm_counts_", names(dfTPM ))
names(dfTPM) <- gsub(
    paste0("norm_counts_", Obio@parameterList$primaryAlignmentGeneID),
    Obio@parameterList$primaryAlignmentGeneID,
    names(dfTPM)
)

## Done adding annotation                                                    ##
###############################################################################

if (nrow(dfLRT) > 0){
  df.summary <- merge(
      df.summary,
      dfLRT,
      by.x = Obio@parameterList$primaryAlignmentGeneID,
      by.y = Obio@parameterList$primaryAlignmentGeneID,
      all = TRUE
  )
  df.summary[is.na(df.summary)] <- 0
}

df.summary <- merge(
    df.summary,
    dfTPM,
    by.x = Obio@parameterList$primaryAlignmentGeneID,
    by.y = Obio@parameterList$primaryAlignmentGeneID,
    all = TRUE
)
df.summary[is.na(df.summary)] <- 0


df.summary <- merge(
    df.summary,
    dfNormCounts,
    by.x = Obio@parameterList$primaryAlignmentGeneID,
    by.y = Obio@parameterList$primaryAlignmentGeneID,
    all = TRUE
)
df.summary[is.na(df.summary)] <- 0

df.summary <- merge(
    df.summary,
    dfPCA,
    by.x = Obio@parameterList$primaryAlignmentGeneID,
    by.y = Obio@parameterList$primaryAlignmentGeneID,
    all = TRUE
)
df.summary[is.na(df.summary)] <- 0

###############################################################################
## Add correlation bits, if they exist                                       ##

## Check gene level correlations ##
if (length(grep("geneCorTables", names(Obio@dataTableList))) > 0){
    dfAdd <- Obio@dataTableList$geneCorTables
    names(dfAdd)[grep("corCoef_", names(dfAdd))] <- paste0("add_venn_X_", names(dfAdd)[grep("corCoef_", names(dfAdd))])

    df.summary <- merge(
      df.summary,
      dfAdd,
      by.x = Obio@parameterList$primaryAlignmentGeneID,
      by.y = Obio@parameterList$primaryAlignmentGeneID,
      all = TRUE
    )
    df.summary[is.na(df.summary)] <- 0
}

## Check ts correlations.
if (length(grep("tsCorTables", names(Obio@dataTableList))) > 0){
    dfAdd <- Obio@dataTableList$tsCorTables

    df.summary <- merge(
      df.summary,
      dfAdd,
      by.x = Obio@parameterList$primaryAlignmentGeneID,
      by.y = Obio@parameterList$primaryAlignmentGeneID,
      all = TRUE
    )
    df.summary[is.na(df.summary)] <- 0
}

## Done with correlations                                                    ##
###############################################################################



## Adding annotation ##
dfAnno <- Obio@dfGeneAnnotation
dfAnno <- dfAnno[dfAnno[,Obio@parameterList$primaryAlignmentGeneID] %in% df.summary[,Obio@parameterList$primaryAlignmentGeneID],]

df.summary <- merge(
    df.summary,
    dfAnno,
    by.x = Obio@parameterList$primaryAlignmentGeneID,
    by.y = Obio@parameterList$primaryAlignmentGeneID,
    all = TRUE
)
df.summary[is.na(df.summary)] <- 0

df.summary[df.summary[,Obio@parameterList$geneIDcolumn] == 0, Obio@parameterList$geneIDcolumn] <- df.summary[df.summary[,Obio@parameterList$geneIDcolumn] == 0, Obio@parameterList$primaryAlignmentGeneID]

## Done creating dfSummary                                                   ##
###############################################################################

###############################################################################
## Add Variation measures                                                    ##

if (!is.null(Obio@dataTableList$dfRowVar)){
    dfVarMeasures <- Obio@dataTableList$dfRowVar

    df.summary <- merge(
        df.summary,
        dfVarMeasures,
        by.x = Obio@parameterList$primaryAlignmentGeneID,
        by.y = Obio@parameterList$primaryAlignmentGeneID,
        all = TRUE
    )
    df.summary[is.na(df.summary)] <- 0

}


## Done with variations                                                      ##
###############################################################################


###############################################################################
# Upload to website                                                           #
###############################################################################
#library(SBwebtools)


###############################################################################
# Prepare database table                                                      #
###############################################################################

###############################################################################
## Default Heatmap option A Ntop most variable genes                         ##

###############################################################################
## Option A Select most variable genes                                       ##

df.summary[["logFC_cut_off"]] <- 0

rowSelVec <- as.vector(
    dfSel[1:Obio@parameterList$NtopGenes,Obio@parameterList$primaryAlignmentGeneID]
)
df.summary[df.summary[, Obio@parameterList$primaryAlignmentGeneID] %in% rowSelVec, "logFC_cut_off"] <- 1


## Select for heatmap ##
# df.summary <- selectHeatmapGenes(
#     dfData = df.summary,
#     cutOff = 1.3,
#     zeroOneCol = "logFC_cut_off",
#     selCol = "contrast_1_lg10p",
#     geneID = Obio@parameterList$geneIDcolumn
# )

## Select for heatmap all genes with a TPM row sum of 2 or higher ##
# df.summary[["logFC_cut_off"]] <- 0
# df.summary[,"logFC_cut_off"] <- rowSums(df.summary[,grep("norm_counts_", names(df.summary))])
# nSamples <- length(unique(dfDesign$sample.id))
# df.summary[,"logFC_cut_off"] <- ifelse(df.summary$logFC_cut_off >= 5*nSamples, 1, 0)

## Select for heatmap: abs change of at least 0.5 in any contrast ##

###############################################################################
## Create main database table                                                ##

Obio@databaseTable <- biologicSeqTools2::datatable.to.website.ptm(
    df.data = df.summary,
    gene.id.column = Obio@parameterList$primaryAlignmentGeneID,
    heatmap.genes = "",
    n.cluster.genes = 2000,
    count.data = TRUE,
    logFC.cut.off = 1,
    #use.logFC.columns.for.heatmap = FALSE,
    selector4heatmap.cols = "norm_counts",
    heatmap.preprocessing = "lg2.row.avg", # possible: "lg2", "lg2.row.avg", "none"
    hm.cut.off = 4,
    n.hm.cluster = 10,
    count.cut.off.filter = 0
)


## Done creating main database table                                         ##
###############################################################################


###############################################################################
## Create Excel output files                                                 ##

addedOutputCols <- c(
  names(Obio@databaseTable)[grep("corCoef", names(Obio@databaseTable))],
  names(Obio@databaseTable)[grep("chromosome_name", names(Obio@databaseTable))]
  
)

if (Obio@parameterList$geneIDcolumn != "hgnc_symbol"){
    addedOutputCols <- c(
        addedOutputCols,
        "hgnc_symbol"
    )
}

createAndFormatExcelOutputFiles(
    obj = Obio,
    metaCoreCountFilter = 1,
    customOutputCols = NULL,
    addedOutputCols = addedOutputCols
)

## Done creating Excel output files                                          ##
###############################################################################

###############################################################################
## Add Covar figure                                                          ##

# figureCol is DEseq2RV or CoVar
figureCol <- "DEseq2RV"



dfDat <- unique(
    Obio@databaseTable[,c(Obio@parameterList$geneIDcolumn, "DEseq2RV", "CoVar")]
)
dfDat[["Var"]] <- dfDat[,figureCol]
dfDat <- dfDat[order(dfDat$Var, decreasing = TRUE),]

dfDat <- dfDat[dfDat$Var > 0, ]
dfDat[["CoVarOrder"]] <- 1:nrow(dfDat)

# Obio@plotCollection[["CoVar"]] <- ggplot(
#     data=dfDat,
#     aes(x=CoVarOrder, y=Var)
# ) + geom_line( ) + geom_vline(xintercept = Obio@parameterList$NtopGene, col="red"
# ) +  theme(
#     axis.text.y   = element_text(size=8),
#     axis.text.x   = element_text(size=8),
#     axis.title.y  = element_text(size=8),
#     axis.title.x  = element_text(size=8),
#     axis.line = element_line(colour = "black"),
#     panel.border = element_rect(colour = "black", fill=NA, size=1),
#     plot.title = element_text(hjust = 0.5, size = 12)
# ) + labs(title = paste0("Variation Seen Across all Genes")
# )

## Done adding Covar figure                                                  ##
###############################################################################

###############################################################################
## Add additional plot columns from database                                 ##

## Done adding additional plot columns                                       ##
###############################################################################



###############################################################################
# Do GSEA                                                                     #
###############################################################################

database.table2 <- Obio@databaseTable
rmVec <- c(
     #grep("contrast_2", names(database.table2))
     grep("contrast_P", names(database.table2)),
     grep("contrast_L", names(database.table2)),
     grep("LRT_", names(database.table2)),
     grep("PCA_", names(database.table2)),
     grep("norm_counts_", names(database.table2)),
     grep("intercept_", names(database.table2)),
     grep("r2_P", names(database.table2)),
     grep("DEseq2NormalizedReadCounts", names(database.table2)),
     grep("p_value_P", names(database.table2)),
     grep("lg2_avg_", names(database.table2))
)

if (length(rmVec) > 0){
     database.table2 <- database.table2[,-rmVec]
}
# #
# ## Remove unnecessary columns, if needed ##
# #
# ## Create GSEA rank files ##
# biologicSeqTools2::create.gsea.rnk.files(
#      Obio@parameterList$localWorkDir,
#      df.dataTable = database.table2,
#      GSEA.colum.type = "_logFC_",
#      gene.symbol.column.name = "hgnc_symbol"
#  )
# #
# # ## Remove last character from file ##
# # #truncate -s -2 file
# # #sed '$d' file # remove last line
# #
# # ## Remove last character from file ##
# # #truncate -s -2 file
# # #sed '$d' file # remove last line
# #
# ## Function to create gmt file ##

if (!is.null(Obio@parameterList$GSEAtables)){
  tables <- Obio@parameterList$GSEAtables
} else {
  tables <- c(
    "mysigdb_h_hallmarks",
    "mysigdb_c5_BP" #,
    #Obio@parameterList$lab.categories.table
  )
}


# #
dfRefGmt <- create.gmt.file.from.ref.data.table(
     host = Obio@dbDetailList$host,
     dbname = "reference_categories_db_new",
     dataTable = tables,
     pwd = db.pwd,
     user=Obio@dbDetailList$db.user,
     gene.id.column = "hgnc_symbol"
 )


###############################################################################
## If dfRefGmt is very large, reduce to most variable genes                  ##
## Define relevant genes for selection ##
relevant.genes <- as.vector(
  unique(
    Obio@databaseTable[Obio@databaseTable$cluster_order, "hgnc_symbol"]
  )
)

lr <- length(relevant.genes)

if (nrow(dfRefGmt) > 10000 & lr > 100){
  TF <- apply(dfRefGmt[,3:ncol(dfRefGmt)], 1, function(x) sum(as.vector(x) %in% relevant.genes) )
  selector <- 0
  selVec <- TF >= selector
  
  while(sum(selVec) > 10000){
    
    selVec <- TF > selector
    
    selector <- selector + 1
    
    
    if (selector > 199){
      stop()
    }
  }
  
  dfRefGmt <- dfRefGmt[selVec, ]
  print(paste0(selector, " gene cut-off set for dfRefGmt. ", sum(selVec), " categories used for GSEA per sample."))
  
  
}



## Done                                                                      ##
###############################################################################

# #
# # ###############################################################################
# # ## Save gmt file                                                             ##
# # #"/camp/stp/babs/working/boeings/Projects/reference_data/GSEA.gmt.files/20160508.rna.seq.txn.analysis.gmt"
# #
localGmtDir <- paste0(
    Obio@parameterList$localWorkDir,
    "GSEA/"
)

if (!exists(localGmtDir)){
  dir.create(localGmtDir)
}

#
gmtDir<- paste0(
    Obio@parameterList$workdir,
    "GSEA/"
)

gmtFileName <- paste0(
    Obio@parameterList$project_id,
    ".",
    "projectGmtFile.gmt"
)

dfRefGmt <- dfRefGmt[!(duplicated(dfRefGmt[,1])),]

write.table(
    dfRefGmt,
    paste0(localGmtDir, gmtFileName),
    col.names = FALSE,
    row.names = FALSE,
    sep="\t",
    quote = F
)
# #
contrasts <- names(database.table2)[grep("logFC",names(database.table2))]
contrasts <- contrasts[contrasts != "logFC_cut_off"]
contrasts

GSEAfn <- paste0(
    Obio@parameterList$localWorkDir,
    "/GSEA/GSEAcommands.sh"
)
sink(GSEAfn)

cat("module load Java/1.8.0_131");cat("\n");cat("\n")
for (i in 1:length(contrasts)){
    gmtFile <- paste0(gmtDir, gmtFileName)
    contrastNo <- unlist(strsplit(contrasts[i], "_"))[2]
    nTopPlots <- 50
    GSEAdir <- paste0(Obio@parameterList$workdir, "GSEA")
    rnkFile <- paste0(GSEAdir, "/",contrasts[i],".rnk")

    gseaCMD <- paste0(
        "sbatch --time=03:00:00 --wrap '",
        "module load Java/1.8.0_131;",
        "java -Xmx2512m -cp /camp/stp/babs/working/boeings/Projects/software/gsea-3.0.jar xtools.gsea.GseaPreranked -gmx ",
        gmtFile,
        " -rnk ",
        rnkFile,
        " -rpt_label ",
        "contrast_",
        contrastNo,
        "_rnaSeqTxnTest",
        " -out ",
        GSEAdir,
        " -collapse false -mode Max_probe -norm meandiv -nperm 1000 -scoring_scheme classic -include_only_symbols true -make_sets true -plot_top_x ",
        nTopPlots,
        " -rnd_seed timestamp -set_max 2500 -set_min 10 -zip_report false -gui false",
        "' --job-name='GSEA_",contrastNo,"' --mem=50G -o GSEA_",contrastNo,".slurm >> commands.txt"
    )
    cat(gseaCMD);cat("\n");cat("\n");


}
sink()
#
chnkVec <- as.vector(NULL, mode = "character")
plotList <- list()


###############################################################################
## Create Clusterdendrogram                                                  ##
tag <- paste0("Clusterdendrogram")

colSelVec <- c(
    alignmentGeneID,
    names(dfMainData)[grep("norm_counts", names(dfMainData))]
)

geneSelVec <- Obio@dataTableList[["Ntop4pcaGeneSelection"]]
geneSelVec <- geneSelVec[geneSelVec != duplicated(geneSelVec)]

dfData <- unique(dfMainData[, colSelVec])
dfData <- dfData[dfData[,alignmentGeneID] %in% geneSelVec, ]

row.names(dfData) <- dfData[,alignmentGeneID]
dfData[,alignmentGeneID] <- NULL
names(dfData) <- gsub("norm_counts_", "", names(dfData))
names(dfData) <- gsub("_TPM", "", names(dfData))





c <- cor(as.matrix(dfData), method="pearson")
d <- as.dist(1-c)
hr <- hclust(d, method = "ward.D", members=NULL)

plotList[[tag]] <- ggdendro::ggdendrogram(
    hr, 
    rotate = TRUE, 
    size = 4, 
    theme_dendro = FALSE, 
    color = "tomato"
    ) + ggplot2::theme_bw(
    ) +  ggplot2::theme(
        axis.text.y   = ggplot2::element_text(size=8),
        axis.text.x   = ggplot2::element_text(size=8),
        axis.title.y  = ggplot2::element_text(size=8),
        axis.title.x  = ggplot2::element_text(size=8),
        axis.line = ggplot2::element_line(colour = "black"),
        panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
        plot.title = ggplot2::element_text(hjust = 0.5, size = 12)
    )

FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
    
pdf(FN)
    print(plotList[[tag]])
dev.off()
##                                                                       ##
###########################################################################
    
figCap <- paste0(
    '**Figure ',
    figureCount,
    'Sample Dendrogram:**  Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>.'
    
)
 
figureCount <- figureCount + 1
    
NewChnk <- paste0(
    paste0(
        "### Cluster Dendrogram \n"
    ),
    "\n```{r SampleDendrogram, results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
        "\n",
        "\n print(plotList[['",tag,"']])",
            "\n cat(  '\n')",
            "\n\n\n```\n"   
)

chnkVec <- c(
    chnkVec,
    NewChnk
)



## Done creating clusterdendrogram                                           ##
###############################################################################


###########################################################################
## Add Coefficient of variation plot                                     ##

if (length(grep("CoVar", names(dfMainData))) > 0){
    tag <- "CoVar_Plot"
    
    figureCol = "DEseq2RV"
    dfDat <- unique(
        dfMainData[,c( geneIDcolumn, "DEseq2RV", "CoVar")]
    )
    dfDat[["Var"]] <- dfDat[,figureCol]
    dfDat <- dfDat[order(dfDat$Var, decreasing = TRUE),]

    dfDat <- dfDat[dfDat$Var > 0, ]
    dfDat[["CoVarOrder"]] <- 1:nrow(dfDat)
    
    

    if (!exists("NtopGene")){
         NtopGene <- length(mostVarIDs)
    }

    plotList[[tag]] <- ggplot2::ggplot(
    data=dfDat,
    ggplot2::aes(x=CoVarOrder, y=Var)
) + ggplot2::geom_line( ) + ggplot2::geom_vline(xintercept = NtopGene, col="red"
) +  ggplot2::theme_bw() + ggplot2::theme(
    axis.text.y   = ggplot2::element_text(size=8),
    axis.text.x   = ggplot2::element_text(size=8),
    axis.title.y  = ggplot2::element_text(size=8),
    axis.title.x  = ggplot2::element_text(size=8),
    axis.line = ggplot2::element_line(colour = "black"),
    panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
    plot.title = ggplot2::element_text(hjust = 0.5, size = 12)
) + ggplot2::labs(title = paste0("Variation Seen Across all Genes")
)

    ###########################################################################
    ## Save plot to file                                                     ##
    FNbase <- paste0("CoVar", VersionPdfExt)
    FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)

    pdf(FN)
        print(plotList[[tag]])
    dev.off()
    ##                                                                       ##
    ###########################################################################

    link <- paste0(
              'An interactive version of this figure can be found ',
              '<a href="https://', urlString,'/',Obio@projectDetailList$project_id,'/scatterplot?x_axis=CoVarOrder&y_axis=CoVar&headline=2D+Scatterplot" target="_blank">here</a>', '. ')  
    
    figLegend <- paste0(
        '**Figure ', 
        figureCount, 
        ':** ',
        ' Coefficient of variation per gene. The red line indicates the cut-off for the most variable genes in this experiment. The most variable genes are the basis for the PCA analysis and heatmap displays. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. ',
     link
)
    
    

    figureCount <- figureCount + 1

    NewChnk <- paste0(
        paste0("### Coefficient of Variation \n"),
            "\n```{r CoVarPlot, results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figLegend,"'}\n",
            "\n",
            "\n print(plotList[['",tag,"']])",
            "\n cat(  '\n')",
            "\n\n\n```\n"
    )

    chnkVec <- c(
        chnkVec,
        NewChnk
    )


}

## Done adding coefficient of variation                                  ##
###########################################################################
if (length(plotList) > 2){
    tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
    tabVar <- ".tabset .tabset-fade .tabset-pills"
}

Sample Characterization

Cluster Dendrogram

Figure 1Sample Dendrogram: Download a pdf of this figure here.

Coefficient of Variation

Figure 2: Coefficient of variation per gene. The red line indicates the cut-off for the most variable genes in this experiment. The most variable genes are the basis for the PCA analysis and heatmap displays. Download a pdf of this figure here. An interactive version of this figure can be found here.

###############################################################################
## Add PCA plot                                                              ##
if (!exists("project_id")){
    project_id <- gsub("_designTable", "", designTB)
}


if (!exists("VersionPdfExt")){
    VersionPdfExt <- paste0(".V", gsub("-", "", Sys.Date()), ".pdf")
}

if (!exists("labname")){
    labname <- "TBD"
}

if (!exists("reportFigDir") || is.null(reportFigDir)){
    reportFigDir <- ""
}

chnkVec <- as.vector(NULL, mode = "character")
plotList <- list()
tag <- "PCAvariationPerDimension"


if (exists("Obio")){
    pos <- grep("PCApercentVar", slotNames(Obio))
    if (!is.null(Obio@PCApercentVar)){
        PCApercentVar <- Obio@PCApercentVar
    }
} else {
    PCApercentVar <- NULL
}


## Use custom PCA colors if specified ##

## Just in case we still have dots instead of underscores
names(dfPCA) <- gsub("\\.", "_", names(dfPCA))
pcaSampleGroups <- unique(sort(dfPCA$sample_group))

## If sample.group colors are set use those, otherwise set default.
pos <- grep("^sample.group_color$", names(dfDesign))

if (length(pos) == 0){
    ## Create default ##
    sample.group <- unique(dfDesign$sample.group)
    sample.group_color <- sample.group
            #library(scales)
    sample.group_color = scales::hue_pal()(length(sample.group_color))
            #sample.group_color = c("#990000", "#009900")
    
    ## set sample group colors manually
    
    dfGroupColors <- unique(data.frame(sample.group, sample.group_color))
    dfDesign <- merge(dfDesign, dfGroupColors, by.x = "sample.group", "sample.group")
    
}

dfColor <- unique(
        Obio@dfDesign[,c("sample.group", "sample.group_color")]
)

if (nrow(dfColor) == length(pcaSampleGroups)){
  
    namedColors <- dfColor$sample.group_color
    names(namedColors) <- dfColor$sample.group
  
    plotList[[tag]] <- ggplot2::ggplot(
        data = dfPCA,
        ggplot2::aes(x=PC1, y=PC2, fill = sample_group)
    ) + ggplot2::geom_vline(xintercept = 0, color = "grey", size=0.5
    ) + ggplot2::geom_hline(yintercept = 0, color = "grey", size=0.5
    ) + ggplot2::geom_point(
        size=2,
        shape = 21
    ) + ggplot2::scale_fill_manual("Sample Groups", values = namedColors
    )
} else {
    plotList[[tag]] <- ggplot2::ggplot(
        data = dfPCA,
        ggplot2::aes(x=PC1, y=PC2, fill = sample_group)
    ) + ggplot2::geom_vline(xintercept = 0, color = "grey", size=0.5
    ) + ggplot2::geom_hline(yintercept = 0, color = "grey", size=0.5
    ) + ggplot2::geom_point(
        size=2,
        shape = 21
    ) 
}




if (!is.null(PCApercentVar)){
    plotList[[tag]] <- plotList[[tag]] + ggplot2::labs(
        title = "PCA Plot", 
        x = paste0("PC1 \n ",round(100* Obio@PCApercentVar[1]),"% variability explained"),
        y = paste0("PC2 \n ",round(100* Obio@PCApercentVar[2]),"% variability explained")
    )
} else {
    plotList[[tag]] <- plotList[[tag]] + ggplot2::labs(
        title = "PCA Plot", 
        x = paste0("PC1"),
        y = paste0("PC2")
    )
}

plotList[[tag]] <- plotList[[tag]] +  ggplot2::theme_bw() + ggplot2::theme(
        axis.text.y   = ggplot2::element_text(size=8),
        axis.text.x   = ggplot2::element_text(size=8),
        axis.title.y  = ggplot2::element_text(size=12),
        axis.title.x  = ggplot2::element_text(size=12),
        axis.line = ggplot2::element_line(colour = "black"),
        panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
        plot.title = ggplot2::element_text(hjust = 0.5, size = 12)
)


###########################################################################
## Save plot to file                                                     ##
FNbase <- paste0("PCA12", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
FNrelT <- paste0("report_tables/", FNbase)
    
pdf(FN)
    print(plotList[[tag]])
dev.off()
##                                                                       ##
###########################################################################
    


link <- paste0('<a href="https://biologic.crick.ac.uk/',project_id,'/pca?x_axis=PC1&y_axis=PC2', '" target="_blank">here</a>')

figCap <- paste0(
    "**Figure ",
    figureCount,
    ":** Variation in the first two PCA Dimensions. Download a pdf of this figure [here](", FNrel, "). ",
    "Further PCA dimensions are available interacively ", link, ". " 
)
 
figureCount <- figureCount + 1
    
NewChnk <- paste0(
paste0("### PCA_Plot \n"),
            "\n```{r ReferencePCA1, results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
            "\n",
            "\n print(plotList[['",tag,"']])",
            "\n cat(  '\n')",
            "\n\n\n```\n"   
)

chnkVec <- c(
    chnkVec,
    NewChnk
)

            

## Done with PCA plot                                                        ##
###############################################################################
###############################################################################
## Add Variation estimate plot                                               ##

if (length(unique(dfDesign$dataseries)) > 1){
    independentDesignColSector <- c(
        "dataseries"
    )
} else {
    independentDesignColSector <- as.vector(NULL, mode = "character")
}

if (length(dfDesign$sample.group) > 1){
    independentDesignColSector <- c(
        independentDesignColSector,
        "sample.group"
    )
}

if (length(dfDesign$replicate) > 1){
    independentDesignColSector <- c(
        independentDesignColSector,
        "replicate"
    )
}

pos <- grep("^f_", names(dfDesign))
if (length(pos) > 0){
  independentDesignColSector <- c(
    names(dfDesign)[grep("^f_", names(dfDesign))]
  )

} else if (length(unique(dfDesign$dataseries)) > 1){
    independentDesignColSector <- c(
      "dataseries"
    )
}

###############################################################################
## Create independent variations plot                                        ##

designColSelector = unique(c(independentDesignColSector, "sample.id"))    

if (length(unique(dfDesign$sample.id)) > 42) {
    rld <- vst(dds)    
} else {
    rld <- rlog(dds)    
}
rv = rowVars(assay(rld))

## Select most variable genes
select = order(rv, decreasing = TRUE)[seq_len(length(mostVarIDs))]
dfTemp = t(assay(rld)[select, ])

pc <- prcomp(dfTemp, center=TRUE, scale=FALSE)

colDatMin = unique(dfDesign[, designColSelector])
rownames(colDatMin) = as.vector(colDatMin$sample.id)

colDatMin$sample.id <- NULL
#colnames(colData)[1] = "condition"

###############################################################################
## Get PCA Loadings                                                          ##

dfBase <- t(dfTemp)
pcaGenes = prcomp(scale(dfBase))

dfPcaGenes = data.frame(pcaGenes$x)

if (ncol(dfPcaGenes) > 10){
    dfPcaGenes <- dfPcaGenes[,1:10]
}

dfPcaGenes[[ alignmentGeneID]] <- row.names(dfPcaGenes)

#Obio@dfPCAgenes <- dfPcaGenes
dfPcaGenes <- Obio@dfPCAgenes

## Retrieve pca loadings from previous step


## Add Gene Annotation
dfAnno <- unique(Obio@dfGeneAnnotation[,c( alignmentGeneID, geneIDcolumn)])
dfAnno <- dfAnno[dfAnno[,alignmentGeneID] %in% dfPcaGenes[,alignmentGeneID], ]
dfLoad <- merge(
    dfAnno, 
    dfPcaGenes, 
    by.x = alignmentGeneID, 
    by.y = alignmentGeneID, 
    all = TRUE
)

dfLoad[is.na(dfLoad)] <- 0
dfLoad[dfLoad[,geneIDcolumn] == 0, geneIDcolumn] <- dfLoad[dfLoad[,geneIDcolumn] == 0, alignmentGeneID]

## Make Loadings Plot ##
## Plot ##
selXY <- c("contrast_P_PCA_estimatePCA1", "contrast_P_PCA_estimatePCA2", geneIDcolumn)
dfSel <- unique(dfLoad[,selXY])
#row.names(dfSel) <- dfSel$gene
dfSel[["highlight"]] <- ""
dfSel[["cat"]] <- ""
dfSel[["selX"]] <- ""
dfSel[["selY"]] <- ""
dfSel <- dfSel[order(dfSel[,selXY[1]], decreasing = FALSE), ]
dfSel[1:15, "highlight"] <- "+"
    
## Use two standard deviations for enrichment ##
twoSD <- 2*sd(dfSel[,selXY[1]])
twoSDxLine <- 2*sd(dfSel[,selXY[1]])
gSvec <- dfSel[dfSel[,selXY[1]] < -1* twoSD, geneIDcolumn]
    

dfSel <- dfSel[order(dfSel[,selXY[1]], decreasing = TRUE), ]
dfSel[1:15, "highlight"] <- "+"
gSvec <- dfSel[dfSel[,selXY[1]] >  twoSD, geneIDcolumn]
    
    
    
    ## Now dim 2
    dfSel <- dfSel[order(dfSel[,selXY[2]], decreasing = FALSE), ]
    dfSel[1:15, "highlight"] <- "+"
    
    twoSD <- 2*sd(dfSel[,selXY[2]])
    twoSDyLine <- 2*sd(dfSel[,selXY[2]])
    gSvec <- dfSel[dfSel[,selXY[2]] < -1* twoSD, geneIDcolumn]
    
    
    
    
    dfSel <- dfSel[order(dfSel[,selXY[2]], decreasing = TRUE), ]
    dfSel[1:15, "highlight"] <- "+"
    gSvec <- dfSel[dfSel[,selXY[2]] >  twoSD, geneIDcolumn]
    
    dfSel[["label"]] <- ""
    dfSel[dfSel$highlight == "+", "label"] <- dfSel[dfSel$highlight == "+", geneIDcolumn]
    
    ## Done
    tag <- "PCA_Loadings"
    
    colVec <- c("grey", "black")
    names(colVec) <- c("", "Selected")
    
    plotList[[tag]] <- ggplot2::ggplot(data=dfSel, aes_string(x=selXY[1],y=selXY[2], label="label")
    ) + geom_vline(xintercept = 0, color = "grey", size=0.5
    ) + geom_hline(yintercept = 0, color = "grey", size=0.5
    ) + geom_vline(xintercept = c(twoSDxLine, -1* twoSDxLine), color = "red", lty=2,size=0.5
    ) + geom_hline(yintercept = c(twoSDyLine, -1* twoSDyLine), color = "red", lty=2,size=0.5
    ) + geom_hline(yintercept = 0, color = "grey", size=0.5
    ) + geom_point(col="black") + scale_color_manual(values=colVec
    #) + ggtitle(paste0("PCA - Cell Level")
    ) + theme_bw(
    ) +  theme(
        #axis.text.y   = element_blank(), # element_text(size=8),
        #axis.text.x   = element_blank(), #element_text(size=8),
        #axis.title.y  = element_blank(), #element_text(size=8),
        #axis.title.x  = element_blank(), #element_text(size=8),
        axis.line = element_line(colour = "black"),
        legend.position = "none",
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        #plot.title = element_text(hjust = 0.5, size = 12)
    )  #+ guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize)))
    
    #points <-  as.vector(unique(dfSel[dfSel$highlight=="+", geneIDcolumn]))
    #plotListGene[[tag]] <- LabelPoints(plot = plotListGene[[tag]], points =points, repel = TRUE, xnudge = 0, ynudge = 0)
    
    plotList[[tag]] <-  plotList[[tag]]  + ggrepel::geom_text_repel(size = 3)
    
    
    ## Save to file ##
    FNbase <- paste0(tag, ".", selXY[1],".", selXY[2], VersionPdfExt)
    FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        plot(plotList[[tag]])
    dev.off()

png 2

    # dim1 <- gsub("PC_", "", xVec[i])
    # dim2 <- gsub("PC_", "", yVec[i])
    link <- paste0(
        '<a href="https://',urlString,'/',
        project_id,
        '/scatterplot?x_axis=contrast_P_PCA_estimatePCA1&y_axis=contrast_P_PCA_estimatePCA2&highlight_gene=&cat_id=ag_lab_categories__10',
        '" target="_blank">here</a>'
    )
    
    figCap <- paste0(
        "**Figure, " ,figureCount,":**Gene-level PCA plot for dimensions ", selXY[1], " and ", selXY[2], ". ",
        ". An interactive version of this figure can be found ", link, ". "
    )
   
    
    NewChnk <- paste0(
        "### PCA_Loadings \n",
        "\n```{r PCA_gene_level , results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
        figCap,"'}\n",
        "\n",
        "\n print(plotList[['",tag,"']])",
        "\n cat(  '\n')",
        "\n\n\n```\n"   
    )
    
    chnkVec <- c(
        chnkVec,
        NewChnk
    )
    
    ## Done with genes                                                       ##
    ###########################################################################
    figureCount <- figureCount + 1


###############################################################################
## Add percent variaton per dimension plot                                   ##

tag <- "Variation_Per_PCA_Dimension"
    
## Add percent variation plot ##
PercentVariation <- round(100*Obio@PCApercentVar,1)
PCdimension <- paste0("PC", 1:length(PercentVariation))  
    
df <- data.frame(
    PercentVariation,
    PCdimension
)

legendString <- ""
if (nrow(df) > 20){
    legendString <- paste0("Only the first 20 principal components out of ",nrow(df)," are shown in the figure. ")
    df <- df[1:20,]
    PCdimension <- PCdimension[1:20]
    
}

df <- df[df$PercentVariation > 0,]

plotList[[tag]] <- ggplot(
    df, 
    aes(PCdimension, PercentVariation)
) + geom_col(
) + scale_x_discrete(limits=PCdimension) +  theme(
    axis.text.y   = element_text(size=8),
    axis.text.x   = element_text(size=8),
    axis.title.y  = element_text(size=8),
    axis.title.x  = element_text(size=8),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill=NA, size=1),
    plot.title = element_text(hjust = 0.5, size = 12)
) + theme_bw()
   
###########################################################################
## Save plot to file                                                     ##
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
    
pdf(FN)
    plotList[[tag]]
dev.off()

png 2

##                                                                       ##
###########################################################################
    
figCap <- paste0(
    '**Figure ',
    figureCount,
    ':** Percent of variaton explained by each principal component. ',
    legendString,
    'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
)
 
figureCount <- figureCount + 1
   
NewChnk <- paste0(
    "### Amount of variation explained by each PCA Dimension ",
    "\n```{r, results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
    "\n",
    "\n print(plotList[['",tag,"']])",
    "\n cat(  '\n')",
    "\n\n\n```\n"   
)

chnkVec <- c(
        chnkVec,
        NewChnk
)
       
    
     
## Done                                                                      ##
###############################################################################





covar_PC_frame <- rbind(
    data.frame(
        Component=1:(nrow(pc$x)-1),
        spread(
            data.frame(
                v=names(colDatMin),
                val=NA_real_
            ),
            key=v, 
            value=val
        )
    )
)


tag <- "independentVariation"

#if (mode == "independentVariation"){        
    covar_PC_frame <- covar_PC_frame[c("Component", names(colDatMin))]
            for (i in 1:nrow(covar_PC_frame)) {
                ## old code from gavin below ##
                fit <- lm(pc$x[,i]~., data=colDatMin)
                covar_PC_frame[i,-1] <- drop1(fit, test="F")[names(covar_PC_frame)[-1],"Pr(>F)"]
                
                ## replaced 25032019 ##
                # Fit each variable individually @
            }
    
    
    plotFrame <- gather(covar_PC_frame, key=Covariate, value=p, -Component)
        plotFrame <- plotFrame[order(plotFrame$Component, decreasing = FALSE),]
        
        if (nrow(plotFrame) > 20) {
            plotFrame <- plotFrame[1:(length(names(colDatMin)) * 20), ]
        }
        
        ## Cut to 10 dimensions ##
        
        plotList[[tag]] <- ggplot(
            plotFrame, 
            aes(x=Component, y=Covariate, fill=-log10(p))) +
            geom_raster() +
            scale_fill_gradient(low="grey90", high="red") +
            theme_classic() + 
            coord_fixed() +
            scale_x_continuous( labels = unique(plotFrame$Component), breaks = unique(plotFrame$Component)
            )
        
        ###########################################################################
    ## Save plot to file                                                     ##
    FNbase <- paste0("Independent.variation.per.pca.dimension", VersionPdfExt)
    FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(plotList[[tag]])
    dev.off()

png 2

    ##                                                                       ##
    ###########################################################################
    
    figCap <- paste0(
        '**Figure ',
        figureCount,
        ':** Independent sources of Variation per principal component. ',
        'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
    )
 
    figureCount <- figureCount + 1
   
    NewChnk <- paste0(
            "### Independent Source of Variation Per PCA Component ",
            "\n```{r var-per-pca-independent, results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
            "\n",
            "\n print(plotList[['",tag,"']])",
            "\n cat(  '\n')",
            "\n\n\n```\n"   
    )

    chnkVec <- c(
        chnkVec,
        NewChnk
    )
        
###############################################################################
## Now the plot tolarating dependent variations                              ##

tag <- "dependentVariation"
    
dependentDesignColSelector<- c(
    independentDesignColSector,
    names(Obio@dfDesign)[grep("comp_", names(dfDesign))],
    names(Obio@dfDesign)[grep("LRT_", names(dfDesign))]
)    
    
covar_PC_frame <- rbind(
            data.frame(
                Component=1:(nrow(pc$x)-1),
                spread(
                    data.frame(
                        v=names(colDatMin),
                        val=NA_real_
                    ),
                    key=v, 
                    value=val
                )
            )
        )

mode <- "dependentVariation"
    ## Do fitting individually ##
    ## Check that all selVec entries exist
        fitVars <- names(covar_PC_frame)
        fitVars <- fitVars[fitVars != "Component"]
        covar_PC_frame <- covar_PC_frame[c("Component", names(colDatMin))]

        for (i in 1:nrow(covar_PC_frame)) {
            ## old code from gavin below ##
            for (j in 1:length(fitVars)){
                corVar <- fitVars[j]

                if (length(unique(dfDesign[, corVar])) > 1) {
                    pcDim <- paste0("pc$x[,",i,"]")
                    regressionFormula <- as.formula(paste(pcDim, corVar, sep="~"))
                    fit <- lm(regressionFormula, data=colDatMin)
                    pVal <- as.vector(summary(fit)$coefficients[,4][2])
                    covar_PC_frame[i, corVar] <- pVal
                }
            }
        }    
        
    plotFrame <- gather(covar_PC_frame, key=Covariate, value=p, -Component)
    plotFrame <- plotFrame[order(plotFrame$Component, decreasing = FALSE),]

    if (nrow(plotFrame) > 20) {
        plotFrame <- plotFrame[1:(length(names(colDatMin)) * 20), ]
    }
    
       
    ## Cut to 10 dimensions ##
    
    plotList[[tag]] <- ggplot(
        plotFrame, 
        aes(x=Component, y=Covariate, fill=-log10(p))) +
        geom_raster() +
        scale_fill_gradient(low="grey90", high="red") +
        theme_classic() + 
        coord_fixed() +
        scale_x_continuous( labels = unique(plotFrame$Component), breaks = unique(plotFrame$Component)
        ) +  ggplot2::theme(
        axis.text.y   = ggplot2::element_text(size=8),
        axis.text.x   = ggplot2::element_text(size=8),
        axis.title.y  = ggplot2::element_text(size=8),
        axis.title.x  = ggplot2::element_text(size=8),
        axis.line = ggplot2::element_line(colour = "black"),
        panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
        plot.title = ggplot2::element_text(hjust = 0.5, size = 12)
    ) + ggplot2::labs(title = "Independent Sources of Variation per PCA Component")
        
        ###########################################################################
    ## Save plot to file                                                     ##
    FNbase <- paste0("Dependent.permissive.variation.per.pca.dimension", VersionPdfExt)
    FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        plotList[[tag]]
    dev.off()

png 2

    ##                                                                       ##
    ###########################################################################
    
    figCap <- paste0(
        '**Figure ',
        figureCount,
        ':** Dependent-tolerant sources of Variation per principal component. ',
        'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
    )
 
    figureCount <- figureCount + 1
   
    NewChnk <- paste0(
            "### Dependent-tolerant Source of Variation Per PCA Component ",
            "\n```{r, results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
            "\n",
            "\n print(plotList[['",tag,"']])",
            "\n cat(  '\n')",
            "\n\n\n```\n"   
    )

    chnkVec <- c(
        chnkVec,
        NewChnk
    )
       
    
    
## Done                                                                      ##
###############################################################################
        

## Add PCA loadings

    ## Add genes driving this PCA dimension ## 
#     if (!is.null(Obio@plotCollection$PCA1_PCA_fitting)){
#         
#         pFit <- Obio@plotCollection$PCA1_PCA_fitting
#         
#         
#         ###########################################################################
#         ## Save plot to file                                                     ##
#         FNbase <- paste0("Variation.per.pca.dimension.", VersionPdfExt)
#         FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
#         FNrel <- paste0("report_figures/", FNbase)
#     
#         pdf(FN)
#             print(pFit)
#         dev.off()
#         ##                                                                       ##
#         ###########################################################################
#         link <- paste0(
#             "https://biologic.crick.ac.uk/",
#             project_id,
#             "/scatterplot?x_axis=contrast_P_PCA_estimatePCA1&y_axis=contrast_P_lg10p_PCA1&highlight_gene=&cat_id=ag_lab_categories__10")
#     figCap <- paste0(
#         "**Figure ",
#         figureCount,
#         ":** Genes driving principal components. ",
#         "Download a pdf of this figure [here](", FNrel, "). ",
#         "Genes driving this - and other PCA dimensions can be accessed interactively [here](", link, "). " 
#     )
#  
#     figureCount <- figureCount + 1
#    
#     NewChnk <- paste0(
#             "### Genes Driving PCA Components ",
#             "\n```{r var-per-pca-genes, results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
#             "\n",
#             "\n print(pFit)",
#             "\n cat(  '\n')",
#             "\n\n\n```\n"   
#     )
#     chnkVec <- c(
#         chnkVec,
#         NewChnk
#     )
#         
#     }
# }
## Done adding PCA plots                                                     ##
###############################################################################
# if (length(plotList) > 2){
#     tabVar <- ".tabset .tabset-fade .tabset-dropdown"
# } else {
#     tabVar <- ".tabset .tabset-fade .tabset-pills"
# }

Principal Component Analysis (PCA)

A birds eye view of your data can be obtained by looking at the results of the principal component analysis (PCA). The principal component analysis looks at your count dataset as a whole and determines how ‘close’ two samples are in terms of overall data structure. First of all, you want your replicated to cluster together. After that, you will be able to determine how different various sets of sample groups are.

A more detailed explanation on PCA is give in this PCA video.

PCA_Plot

Figure 3: Variation in the first two PCA Dimensions. Download a pdf of this figure here. Further PCA dimensions are available interacively here.

PCA_Loadings

Figure, 4:Gene-level PCA plot for dimensions contrast_P_PCA_estimatePCA1 and contrast_P_PCA_estimatePCA2. . An interactive version of this figure can be found here.

Amount of variation explained by each PCA Dimension

Figure 5: Percent of variaton explained by each principal component. Only the first 20 principal components out of 238 are shown in the figure. Download a pdf of this figure here.

Independent Source of Variation Per PCA Component

Figure 6: Independent sources of Variation per principal component. Download a pdf of this figure here.

Dependent-tolerant Source of Variation Per PCA Component

Figure 7: Dependent-tolerant sources of Variation per principal component. Download a pdf of this figure here.

## Make heatmap gene list
logFCselections <- names(dfMainData)[grep("_logFC_", names(dfMainData))]
padjSelections <- gsub("_logFC_", "_padj_", logFCselections)
dfSelections <- data.frame(logFCselections, padjSelections)
dfSelections <- dfSelections[dfSelections[,"padjSelections"] %in% names(dfMainData),]
if (nrow(dfSelections) > 2){
    tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
    tabVar <- ".tabset .tabset-fade .tabset-pills"
}
## Make heatmap gene list
logFCselections <- names(dfMainData)[grep("_logFC_", names(dfMainData))]
padjSelections <- gsub("_logFC_", "_padj_", logFCselections)
dfSelections <- data.frame(logFCselections, padjSelections)
dfSelections <- dfSelections[dfSelections[,"padjSelections"] %in% names(dfMainData),]
if (!exists("project_id")){
    project_id <- gsub("_designTable", "", designTB)
}
if (!exists("VersionPdfExt")){
    VersionPdfExt <- paste0(".V", gsub("-", "", Sys.Date()), ".pdf")
}
if (!exists("labname")){
    labname <- "TBD"
}
if (!exists("reportFigDir") || is.null(reportFigDir)){
    reportFigDir <- ""
}
###############################################################################
## First heatmap: Most variable genes                                        ##
HMplotList <- list()
chnkVec <- as.vector(NULL, mode="character")
if (geneIDcolumn == "mgi_symbol" | geneIDcolumn == "hgnc_symbol"){
    geneSelCol <- geneIDcolumn
} else {
    geneSelCol <- "hgnc_symbol"
}
# if (is.null(HmDisplayCatsFromDb)){
    HmDisplayCatsFromDb <- list()
# }
    
## Start with Nmost variable genes ##
if (exists("Ntop4pcaGeneSelection") && !is.null(Ntop4pcaGeneSelection) | 
    length(Ntop4pcaGeneSelection) > 3){
    dfDataTable <- dfMainData
    
    geneVec <- as.vector(unique(dfDataTable[dfDataTable[,alignmentGeneID] %in% Ntop4pcaGeneSelection,geneIDcolumn]))
} else {
    geneVec <- unique(dfMainData[dfMainData$logFC_cut_off == 1, geneIDcolumn])
    Ntop4pcaGeneSelection <- geneVec
}
cat.name <- paste0("Experiment_",project_id, "_",length(Ntop4pcaGeneSelection),"_most_variable_genes")
cat.description.text <- paste0(
    "In this gene set the ",
    length(geneVec),
    " most variable genes from ",
    labname,
    " lab experiment \\<a href=\\'https:\\/\\/biologic.crick.ac.uk\\/",
    project_id,"\\'\\>",project_id, "\\<\\/a\\> are compiled."
)
HmDisplayCatsFromDb[[cat.name]] <- list(
      "cat_type" = paste0("temp_", project_id),
      "data_source" = paste0(labname, " Lab") ,
      "cat.description.text" = cat.description.text,
      "geneVec" = geneVec,
      "catID" = NULL,
      "comparisonID" = NULL
)
###########################################################################
## Make one heatmap per comparison                                       ##
    
numextract <- function(string){ 
    stringr::str_extract(string, "contrast_\\-*\\d+\\.*\\d*_")
} 

dfSelections[["designColumn"]] <- sapply(dfSelections$padjSelections, function(x) unlist(strsplit(x, "padj_"))[2]) 

## Get design column from model file ##
designColNames <- sapply(dfSelections$padjSelections, function(x) unlist(strsplit(x, "padj_"))[2])

modelComp <- as.vector(dfModel$comparison)
designColNames[!(designColNames %in% modelComp)] <- ""

dfModelSel <- dfModel[dfModel$comparison %in% designColNames,]

dfSelections[["designColumn"]] <- ""

    if (nrow(dfModelSel) > 0){
    ## replace all entries found in dfModel to comparisonID
    for (i in 1:nrow(dfModelSel)){
        designColNames <- gsub(paste0("^", as.vector(dfModel[i, "comparison"]), "$"), as.vector(dfModel[i, "comparisonID"]),designColNames )
    }
    
    dfSelections[["designColumn"]]  <-  designColNames 
} 



for (k in 1:nrow(dfSelections)){
    dfDataTable <- dfMainData
    padjCutOff <- 0.05
        
    geneVec <- as.vector(
        unique(
            dfDataTable[dfDataTable[,as.vector(dfSelections$padjSelections[k])] < 0.05 & dfDataTable[,as.vector(dfSelections$logFCselections[k])] != 0,geneIDcolumn]
            )
    )
        
    if (length(geneVec) > 1500){
        padjCutOff <- 0.01
        
        geneVec <- as.vector(
            unique(
                dfDataTable[dfDataTable[,as.vector(dfSelections$padjSelections[k])] < 0.01 & 
                                    dfDataTable[,as.vector(dfSelections$logFCselections[k])] != 0,geneIDcolumn
                ]
            )
        )
    }
        
    ## Insert gene set into database ##
    cat.name <- paste0(
        "Experiment_",project_id, "_",dfSelections$padjSelections[k],"_smaller_than_", gsub("[.]", "_", padjCutOff)
    )
    
    cat.description.text <- paste0(
        "In this gene set the genes that exhibited an adjusted p value of less than ", 
        padjCutOff, 
        " in the differential gene expression comparsion ", 
        as.vector(dfSelections$logFCselections[k]),
        " in ",
        labname,
        " lab experiment \\<a href=\\'https:\\/\\/biologic.crick.ac.uk\\/",project_id,"\\'\\>",project_id, "\\<\\/a\\> are compiled."
      )
        
      comparisonID <- as.vector(dfSelections[k, "designColumn"])
      if (comparisonID == ""){
          comparisonID <- NULL
      }
    
      HmDisplayCatsFromDb[[cat.name]] <- list(
          "cat_type" = paste0("temp_", project_id),
          "data_source" = paste0(labname, " Lab") ,
          "cat.description.text" = cat.description.text,
          "geneVec" = geneVec,
          "catID" = NULL,
          "comparisonID" = comparisonID
      )
            
}
## Done with making heatmap list                                         ##
###########################################################################

[1] “Retrieved category: Transcription_Factors_MC” [1] "Retrieved category ID: ag_lab_categories__10" [1] “Retrieved category: LIANA_Ligand_Reference_2024” [1] "Retrieved category ID: sc_lab_categories__1597" [1] “Retrieved category: LIANA_Receptor_Reference_2024” [1] "Retrieved category ID: sc_lab_categories__1598" [1] “Retrieved category: GO_CYTOKINESIS” [1] "Retrieved category ID: mysigdb_c5_BP__3745" [1] “Retrieved category: GO_PROTEIN_KINASE_ACTIVITY” [1] "Retrieved category ID: mysigdb_c5_MF__169"

## Create Heatmaps ##
###############################################################################
## Reorder Obio@parameterList$HmDisplayCatsFromDb so that 500 var is on top  ##
pos <- grep("most_variable_genes", names(HmDisplayCatsFromDb))
if (length(pos) > 0){
  pos <- pos[1]
  newOrder <- c(
    names(HmDisplayCatsFromDb)[pos],
    names(HmDisplayCatsFromDb)[-pos]
  )
  HmDisplayCatsFromDb <- HmDisplayCatsFromDb[newOrder]
}
##                                                                           ##
###############################################################################



## Begin heatmap plotting loop ##
for (k in 1:length(HmDisplayCatsFromDb)){
    
    ## Select samples to display ##
    if (!is.null(HmDisplayCatsFromDb[[k]]$comparisonID)){
        dfSel <- unique(dfDesign[,c("sample.id", HmDisplayCatsFromDb[[k]]$comparisonID)])
        dfSel <- dfSel[dfSel[,HmDisplayCatsFromDb[[k]]$comparisonID] != "",]
        
        if (nrow(dfSel) > 1){
            sampleSelection <- paste0("norm_counts_", unique(dfSel$sample.id))    
        } else {
            sampleSelection <- paste0("norm_counts_", unique(dfDesign$sample.id))
        }
        
    } else {
        sampleSelection <- paste0("norm_counts_", unique(dfDesign$sample.id))
    }
  
    ## Check ##
     
    sampleSelection <- unique(names(dfMainData)[unlist(sapply(paste0("^", sampleSelection, "$"), function(x) grep(x, names(dfMainData))))])
    selVec <- c(geneIDcolumn, sampleSelection )
    ## Get gene selection 
    geneSel <- HmDisplayCatsFromDb[[k]]$geneVec
    
    geneSel <- unique(geneSel)
    geneSel <- geneSel[geneSel != ""]
    
    if (length(geneSel) > 2){
        dfDataTable <- dfMainData
        dfDataTable <- unique(dfDataTable[dfDataTable[, geneIDcolumn] %in% geneSel, selVec])
        
        dfHmBase <- unique(dfDataTable[,selVec])
        
        while (sum(duplicated(dfHmBase[, geneIDcolumn])) > 0){
            dfHmBase[duplicated(dfHmBase[, geneIDcolumn]), geneIDcolumn] <- paste0(
                dfHmBase[duplicated(dfHmBase[, geneIDcolumn]), 
                geneIDcolumn], "_", i
            )
            i=i+1
        }
        
        row.names(dfHmBase) <- dfHmBase[, geneIDcolumn]
        dfHmBase[, geneIDcolumn] <- NULL
        
        ## calculate row-means ##
        rowMeans <- apply(
            dfHmBase,
            1,
            function(x) mean(x)
        )
            
        rowMeans[rowMeans ==0] <- 0.001
            
        hmMax <- 4
        for (i in 1:ncol(dfHmBase)){
            dfHmBase[,i] <- log2(dfHmBase[,i] / rowMeans)
        }
            
        dfHmBase[dfHmBase > hmMax] <- hmMax
        dfHmBase[dfHmBase < -1*hmMax] <- -1*hmMax
            
            
        names(dfHmBase) <- gsub("norm_counts_", "", names(dfHmBase))
        names(dfHmBase) <- gsub("_TPM", "", names(dfHmBase))
            
        mHmBase <- data.matrix(dfHmBase)
            
        if ( nrow(mHmBase) < 201){
            showRowNames <- TRUE
        } else {
            showRowNames <- FALSE
        }
        
        ## Create heatmap plot ##
        #library(ComplexHeatmap)
       
        f1 = circlize::colorRamp2(seq(-4, 4, length = 3), c("#3060cf", "#fffbbc","#c4463a"))    
    
        anno <- as.data.frame(colnames(mHmBase))
        colnames(anno) <- "Sample"
        anno$Group <- sapply(as.vector(anno[,1]), function(x) paste0(unlist(strsplit(x, "_"))[1], "_",unlist(strsplit(x, "_"))[2]))
        
        ## Color sample groups in line with the designated sample group color ##
        #######################################################################
        ## Add sample group colors if needed
        pos <- grep("sample.group_color", names(dfDesign))
        
        if (length(pos) == 0){
            sample.group <- unique(dfDesign$sample.group)
            sample.group_color <- sample.group
            #library(scales)
            sample.group_color = scales::hue_pal()(length(sample.group_color))
            #sample.group_color = c("#990000", "#009900")
            dfGroupColors <- unique(data.frame(sample.group, sample.group_color))
            dfDesign <- merge(dfDesign, dfGroupColors, by.x = "sample.group", "sample.group")
            if (exists("Obio")){
                Obio@dfDesign <- dfDesign
            }
            
        }
        
        
        
        #library(scales)
        #hue_pal()(2)
        df <- unique(data.frame(dfDesign[,c("sample.id", "sample.group", "sample.group_color")]))
        df <- df[df$sample.id %in% colnames(mHmBase),]
        df2 <- data.frame(df[,"sample.group"])
        names(df2) <- "Group"
        
                
        GroupVec <- as.vector(df$sample.group_color)
        names(GroupVec) <- as.vector(df$sample.group)
        
        
        
        #df2 <- unique(data.frame(Obio@dfDesign[,c("sample.id","sample.group", "sample.group_color")]))
        #df2 <- data.frame(df2[,c("sample.group")])
        
        
        
        ha = ComplexHeatmap::HeatmapAnnotation(df = df2, col = list(Group = GroupVec))
    
        ComplexHeatmap::ht_opt(
            legend_border = "black",
            heatmap_border = TRUE,
            annotation_border = FALSE
        )
        
        hmTitle <- unlist(strsplit(names(HmDisplayCatsFromDb)[k], "_padj_"))
        if (length(hmTitle) == 2){
            hmTitle <- paste0("padj_", hmTitle[2])
        } else {
            hmTitle <- names(HmDisplayCatsFromDb)[k]
        }

        rowFontSize <- 10
        if (nrow(mHmBase) > 100){
            rowFontSize <- 2
        } else if (nrow(mHmBase) > 50){
            rowFontSize <- 6
        } else if (nrow(mHmBase) > 20){
            rowFontSize <- 8
        }

        HMplotList[[names(HmDisplayCatsFromDb)[k]]] = ComplexHeatmap::Heatmap(
            mHmBase,
            column_title = gsub(
                    "_", 
                    " ", 
                    hmTitle
            ),
            name = paste0("HM_", k), 
            #row_km = 5,
            col = f1,
           
            show_column_names = T,
            show_row_names = showRowNames,
            ## row text size
            row_names_gp = grid::gpar(fontsize = rowFontSize),
            border = TRUE,
            
            #Dendrogram configurations: columns
            clustering_distance_columns="euclidean",
            clustering_method_columns="complete",
            column_dend_height=unit(10,"mm"),
            
            #Dendrogram configurations: rows
            clustering_distance_rows="euclidean",
            clustering_method_rows="complete",
            row_dend_width=unit(10,"mm"),
            top_annotation = ha,
            show_heatmap_legend = TRUE
            #row_title = NULL,
            #show_row_dend = FALSE
        ) 
        
    ComplexHeatmap::ht_opt(RESET = TRUE)
        
    if (! is.null(HmDisplayCatsFromDb[[k]]$cat_id)){
        link <- paste0(
            'An interactive version of this heatmap with an option for further filtering can be found <a href="',
            "https://biologic.crick.ac.uk/",
            project_id,"/category-view/",
            HmDisplayCatsFromDb[[k]]$cat_id,'" target="_blank">here</a>.'
        )
        
    } else {
        link <- ""
    }
    
    ###########################################################################
    ## Save plot to file                                                     ##
    FNbase <- paste0("Heatmap.", names(HmDisplayCatsFromDb)[k],VersionPdfExt)
    FN <- paste0(reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(HMplotList[[names(HmDisplayCatsFromDb)[k]]])
    dev.off()
    ##                                                                       ##
    ###########################################################################
    
    figCap <- paste0(
    '**Figure ',
    figureCount,
    ':** Heatmap showing the gene category ', gsub('_', ' ', names(HmDisplayCatsFromDb)[k]), '. ',
        'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. ',
        link
    )
    
    figureCount <- figureCount + 1 
    
    NewChnk <- paste0(
            "## HM_", names(HmDisplayCatsFromDb)[k],
            "\n```{r, results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
            "\n",
            "\n print(HMplotList[['",names(HmDisplayCatsFromDb)[k],"']])",
            "\n cat(  '\n')",
            "\n\n\n```\n"   
    )
    
    chnkVec <- c(
        chnkVec,
        NewChnk
    )
    
    } ## End making heatmap 
    
## Done making heatmaps                                                      ##
###############################################################################
}
## End heatmap plotting loop
## Done                                                                      ##
###############################################################################
if (length(HMplotList) > 2){
    tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
    tabVar <- ".tabset .tabset-fade .tabset-pills"
}

Heatmaps

HM_Experiment_bulkliverdemo_500_most_variable_genes

Figure 8: Heatmap showing the gene category Experiment bulkliverdemo 500 most variable genes. Download a pdf of this figure here. An interactive version of this heatmap with an option for further filtering can be found here.

HM_Experiment_bulkliverdemo_contrast_1_padj_Colon_T_vs_Colon_N_smaller_than_0_01

Figure 9: Heatmap showing the gene category Experiment bulkliverdemo contrast 1 padj Colon T vs Colon N smaller than 0 01. Download a pdf of this figure here. An interactive version of this heatmap with an option for further filtering can be found here.

HM_Experiment_bulkliverdemo_contrast_2_padj_Kidney_T_vs_Kidney_N_smaller_than_0_01

Figure 10: Heatmap showing the gene category Experiment bulkliverdemo contrast 2 padj Kidney T vs Kidney N smaller than 0 01. Download a pdf of this figure here. An interactive version of this heatmap with an option for further filtering can be found here.

HM_Experiment_bulkliverdemo_contrast_3_padj_Liver_T_vs_Liver_N_smaller_than_0_05

Figure 11: Heatmap showing the gene category Experiment bulkliverdemo contrast 3 padj Liver T vs Liver N smaller than 0 05. Download a pdf of this figure here. An interactive version of this heatmap with an option for further filtering can be found here.

HM_Experiment_bulkliverdemo_contrast_4_padj_Lung_T_vs_Lung_N_smaller_than_0_05

Figure 12: Heatmap showing the gene category Experiment bulkliverdemo contrast 4 padj Lung T vs Lung N smaller than 0 05. Download a pdf of this figure here. An interactive version of this heatmap with an option for further filtering can be found here.

HM_Experiment_bulkliverdemo_contrast_5_padj_Skin_T_vs_Skin_N_smaller_than_0_05

Figure 13: Heatmap showing the gene category Experiment bulkliverdemo contrast 5 padj Skin T vs Skin N smaller than 0 05. Download a pdf of this figure here. An interactive version of this heatmap with an option for further filtering can be found here.

HM_Experiment_bulkliverdemo_contrast_6_padj_panTumor_vs_panNormal_smaller_than_0_01

Figure 14: Heatmap showing the gene category Experiment bulkliverdemo contrast 6 padj panTumor vs panNormal smaller than 0 01. Download a pdf of this figure here. An interactive version of this heatmap with an option for further filtering can be found here.

HM_TFs_in_most_variable_genes

Figure 15: Heatmap showing the gene category TFs in most variable genes. Download a pdf of this figure here.

HM_Ligands_in_most_variable_genes

Figure 16: Heatmap showing the gene category Ligands in most variable genes. Download a pdf of this figure here.

HM_Receptors_in_most_variable_genes

Figure 17: Heatmap showing the gene category Receptors in most variable genes. Download a pdf of this figure here.

HM_GO_Kinases

Figure 18: Heatmap showing the gene category GO Kinases. Download a pdf of this figure here.

## Make heatmap gene list
logFCselections <- names(dfMainData)[grep("_logFC_", names(dfMainData))]
padjSelections <- gsub("_logFC_", "_padj_", logFCselections)

dfSelections <- data.frame(logFCselections, padjSelections)
dfSelections <- dfSelections[dfSelections[,"padjSelections"] %in% names(dfMainData),]

if (!exists("project_id")){
    project_id <- gsub("_designTable", "", designTB)
}

if (!exists("labname")){
    labname <- "TBD"
}

if (!exists("reportFigDir") || is.null(reportFigDir)){
    reportFigDir <- ""
}

if (!exists("VersionPdfExt")){
    VersionPdfExt <- paste0(".V", gsub("-", "", Sys.Date()), ".pdf")
}


if (nrow(dfSelections) > 2){
    tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
    tabVar <- ".tabset .tabset-fade .tabset-pills"
}
if (!exists("sdCutOff")){
  sdCutOff <- 2  
}

chnkVec <- as.vector(NULL, mode = "character")

MAplotList <- list()
VplotList <- list()

## Create dfMAplots ##
contrastSel <- c(
  names(dfMainData)[grep("contrast_[0-9]{1,2}", names(dfMainData))],
  names(dfMainData)[grep("contrast_D[0-9]{1,2}", names(dfMainData))]
)
MAselVec <- c(
    contrastSel[grep("lg2BaseMean", contrastSel)],
    contrastSel[grep("logFC", contrastSel)]
)

VolcanoSelVec <- c(
    contrastSel[grep("logFC", contrastSel)],
    contrastSel[grep("lg10p", contrastSel)]
)

contrastVec <- as.vector(sapply(
    contrastSel[grep("logFC", contrastSel)],
    function(x) unlist(strsplit(x, "logFC_"))[2]
))


###############################################################################
## Make MA plot function                                                     ##


makeMAplot <- function(
    dfPlotData,
    geneIDcolumn,
    topNgenes = 5,
    dotsize = 1,
    legendDotSize = 5,
    sdCutOff = 1
){
    headline <- names(dfPlotData)[grep("logFC", names(dfPlotData))]
    headline <- unlist(strsplit(headline, "logFC_"))[2]
    
    names(dfPlotData) <- gsub("contrast_[0-9]{1,2}_", "", names(dfPlotData))
    
    logFCcolName <- names(dfPlotData)[grep("logFC", names(dfPlotData))]
    padjColName <- names(dfPlotData)[grep("padj", names(dfPlotData))]
    lg2BaseMeanColName <- names(dfPlotData)[grep("lg2BaseMean", names(dfPlotData))]
    
    
    ## Now let's get these data columns out of the main data table.
    dfPlotData <- dfPlotData[dfPlotData[,lg2BaseMeanColName] > 0, ]
    
    
    ## For plotting we are using the R-package ggplot. This is a widely used, comprehensive package to make beautiful plots. More information on that here: https://ggplot2.tidyverse.org/
    
    library(ggplot2)
    
    ## Let's add an example for custom coloring here. We are going to highlight the most variable genes in this scatterplot. To do that, we need to add a color column to the plot data dataframe.
    
    ## Now let's color by significantly up-regulated genes in red, and significantly downregulated genes in blue
    
    dfPlotData[["color"]] <- "NS"
    dfPlotData[dfPlotData[, logFCcolName] > 0 & dfPlotData[, padjColName] < 0.05, "color"] <-  "Up"
    
    dfPlotData[dfPlotData[, logFCcolName] < 0 & dfPlotData[, padjColName] < 0.05, "color"] <-  "Down"
    
    ## Re-order dfPlotData for better results
    
    ## Let's have a look at the color vector
    
    
    colorVec <- c("blue", "red","black")
    
    names(colorVec) <- c("Down", "Up", "NS")
    
    
    ## And here is the resulting color vector
    colorVec <- colorVec[names(colorVec) %in% dfPlotData$color]
    
    dfPlotData$color <- factor(dfPlotData$color, levels = names(colorVec))
    dfPlotData <- dfPlotData[order(dfPlotData$color, decreasing = F), ]
    
    ## Now let's also add a label for the 10 most significantly up- and down-regulated genes.This number can be changed in the variable Nsel. Here we use the R package ggrepel.
    
    library(ggrepel)
    
    ## Let's order the data frame by log-fold change
    dfPlotData <- dfPlotData[order(dfPlotData[,logFCcolName], decreasing = T), ]
    topGenes <- as.vector(dfPlotData[1:topNgenes,geneIDcolumn])
    
    dfPlotData <- dfPlotData[order(dfPlotData[,logFCcolName], decreasing = F), ]
    bottomGenes <- as.vector(dfPlotData[1:topNgenes,geneIDcolumn])
    
    dfPlotData[["label"]] <- ""
    dfPlotData[dfPlotData[,geneIDcolumn] %in% c(topGenes, bottomGenes), "label"] <- dfPlotData[dfPlotData[,geneIDcolumn] %in% c(topGenes, bottomGenes), geneIDcolumn]
    
    yScaleMax <- max(abs(dfPlotData[,logFCcolName]))
    
    lgFCsel <- sdCutOff * sd(dfPlotData[, logFCcolName])
    ## Now let's first make the MA-plot without lables
    
    plotNoLabels <- ggplot(
        data = dfPlotData, 
        aes_string(x=lg2BaseMeanColName, y=logFCcolName, color = "color", label = "label")
    ) + geom_hline(yintercept = 0, color = "black", size=0.5
    ) + geom_hline(yintercept = c(-1*lgFCsel,lgFCsel), color = "grey", size=0.5, linetype = 2               
    ) + geom_point( shape=16, size = dotsize
    ) + scale_colour_manual(name = "Significant" ,values = colorVec
    ) + theme_bw(
    )  +  theme(
        axis.text.y   = element_text(size=8),
        axis.text.x   = element_text(size=8),
        axis.title.y  = element_text(size=8),
        axis.title.x  = element_text(size=8),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        plot.title = element_text(hjust = 0.5, size = 12)
    ) + ylim(-1*yScaleMax, yScaleMax
    ) + ggtitle(paste0("MA-Plot ", contrastVec[i])            
    ) + xlab(gsub("_", " ", logFCcolName)
    ) + ylab(gsub("_", " ", logFCcolName)            
    ) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
    ) 
         
    
    ## And now let's add the labels:
    plotWithLabels <- plotNoLabels + geom_text_repel(size = 3)
    
    return(plotWithLabels)  
}

## End Make MA plot function                                                 ##
###############################################################################

###############################################################################
## Make Volcanoplot                                                          ##
makeVolcanoPlot <- function(
    dfPlotData,
    geneIDcolumn,
    topNgenes = 5,
    dotsize = 1,
    legendDotSize = 5,
    sdCutOff = 1
){
    headline <- names(dfPlotData)[grep("logFC", names(dfPlotData))]
    headline <- unlist(strsplit(headline, "logFC_"))[2]
    
    names(dfPlotData) <- gsub("contrast_[0-9]{1,2}_", "", names(dfPlotData))
    
    logFCcolName <- names(dfPlotData)[grep("logFC", names(dfPlotData))]
    lg10pColName <- names(dfPlotData)[grep("lg10p", names(dfPlotData))]
    padjColName <- names(dfPlotData)[grep("padj", names(dfPlotData))]
    
    ## Now let's get these data columns out of the main data table.
    dfPlotData <- dfPlotData[dfPlotData[,logFCcolName] != 0, ]
    
    ## Determine logFC cut-off for the Volcano Plot ##
    lgFCsel <- sdCutOff * sd(dfPlotData[, logFCcolName])
    
    dfPlotData[["color"]] <- "NS"
    dfPlotData[dfPlotData[, logFCcolName] > lgFCsel & dfPlotData[, padjColName] < 0.05, "color"] <-  "Up"
    
    dfPlotData[dfPlotData[, logFCcolName] < -1*lgFCsel & dfPlotData[, padjColName] < 0.05, "color"] <-  "Down"
    
    ## Re-order dfPlotData for better results
    
    ## Let's have a look at the color vector
    
    
    colorVec <- c("blue", "red","black")
    
    names(colorVec) <- c("Down", "Up", "NS")
    
    
    ## And here is the resulting color vector
    colorVec <- colorVec[names(colorVec) %in% dfPlotData$color]
    
    dfPlotData$color <- factor(dfPlotData$color, levels = names(colorVec))
    dfPlotData <- dfPlotData[order(dfPlotData$color, decreasing = F), ]
    
    ## And here is the resulting color vector
    colorVec <- colorVec[names(colorVec) %in% dfPlotData$color]
    
    dfPlotData$color <- factor(dfPlotData$color, levels = names(colorVec))
    dfPlotData <- dfPlotData[order(dfPlotData$color, decreasing = F), ]
    
    ## Now let's also add a label for the 10 most significantly up- and down-regulated genes.This number can be changed in the variable Nsel. Here we use the R package ggrepel.
    
    library(ggrepel)
    
    ## Let's order the data frame by log-fold change
    dfPlotData <- dfPlotData[order(dfPlotData[,logFCcolName], decreasing = T), ]
    topGenes <- as.vector(dfPlotData[1:topNgenes,geneIDcolumn])
    
    dfPlotData <- dfPlotData[order(dfPlotData[,logFCcolName], decreasing = F), ]
    bottomGenes <- as.vector(dfPlotData[1:topNgenes,geneIDcolumn])
    
    dfPlotData[["label"]] <- ""
    dfPlotData[dfPlotData[,geneIDcolumn] %in% c(topGenes, bottomGenes), "label"] <- dfPlotData[dfPlotData[,geneIDcolumn] %in% c(topGenes, bottomGenes), geneIDcolumn]
    
xMaxVal <- max(abs(dfPlotData[,logFCcolName]))
    
pVolcano <- ggplot(
        data = dfPlotData, 
        aes_string(x=logFCcolName, y=lg10pColName, color = "color",label = "label")
    ) + geom_hline(yintercept = 0, color = "black", size=0.5
    ) + geom_hline(yintercept = -1*log10(0.05), color = "grey", size=0.5, linetype = 2
    ) + geom_vline(xintercept = 0, color = "black", size=0.5
    ) + geom_vline(xintercept = c(-1*lgFCsel,lgFCsel), color = "grey", size=0.5, linetype = 2      ) + geom_point( shape=16, size = dotsize
    ) + scale_colour_manual(name = "Variability" ,values = colorVec
    
    ) + theme_bw(
    )  +  theme(
        axis.text.y   = element_text(size=8),
        axis.text.x   = element_text(size=8),
        axis.title.y  = element_text(size=8),
        axis.title.x  = element_text(size=8),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        plot.title = element_text(hjust = 0.5, size = 12)
    ) + xlim(-1*xMaxVal,xMaxVal
    ) + ggtitle(paste0("Volcano Plot ", contrastVec[i]) 
    ) + xlab(gsub("_", " ", logFCcolName)
    ) + ylab(gsub("_", " ", lg10pColName)            
    ) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
    ) 

    plotVolcanoWithLabels <- pVolcano + geom_text_repel(size = 3)

return(plotVolcanoWithLabels)
}
## Done Volcanoplot                                                          ##
###############################################################################

for (i in 1:length(contrastVec)){
    ## Make MA-plot ##
    contrastVec <- as.vector(sapply(
    contrastSel[grep("logFC", contrastSel)],
    function(x) unlist(strsplit(x, "logFC_"))[2]
))
    
    selVec <- c(
        geneIDcolumn,
        names(dfMainData)[grep(paste0("lg2BaseMean_", contrastVec[i], "$"), names(dfMainData))],
        names(dfMainData)[grep(paste0("logFC_", contrastVec[i], "$"), names(dfMainData))],
        names(dfMainData)[grep(paste0("padj_", contrastVec[i], "$"), names(dfMainData))],
        names(dfMainData)[grep(paste0("lg10p_", contrastVec[i], "$"), names(dfMainData))]
    )
    
    dfPlotData <- unique(dfMainData[,selVec])
    
    tagMA <- paste0("MA_", contrastVec[i])
    
    MAplotList[[tagMA]] <- makeMAplot(
        dfPlotData = dfPlotData,
        geneIDcolumn = geneIDcolumn,
        topNgenes = 5,
        dotsize = 1,
        legendDotSize = 5,
        sdCutOff = sdCutOff
    )
    
    ###########################################################################
    ## Save plot to file                                                     ##
    FNbase <- paste0(contrastVec[i], ".MA.plot", VersionPdfExt)
    FN <- paste0(reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(MAplotList[[tagMA]])
    dev.off()
    ##                                                                       ##
    ###########################################################################
    
    selLg2BM <- selVec[grep("lg2BaseMean_", names(dfPlotData))]
    selLogFC <- selVec[grep("_logFC_", names(dfPlotData))]
    
     
    xAxis <- selLg2BM[grep(contrastVec[i], selLg2BM)]
    yAxis <- selLogFC[grep(contrastVec[i], selLogFC)]
    
    link1 <- paste0('<a href="https://biologic.crick.ac.uk/',project_id,'/scatterplot?x_axis=',xAxis,'&y_axis=',yAxis,'&cat_id=ag_lab_categories__10" target="_blank">here</a>.')
    
    
    figCap <- paste0(
        '**Figure ',
        figureCount,
        'A:** Volcano and MA-plot Plot ',gsub('MA_', '', tagMA),'. ',
        'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. ',
        'An interactive version of this plot can be found ', link1
    )
 
    figureCount <- figureCount + 1
   
    NewChnk <- paste0(
            "## MA-Plot ",contrastVec[i],
            "\n```{r ",contrastVec[i],", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
            "\n",
            "\n print(MAplotList[['",tagMA,"']])",
            "\n cat(  '\n')",
            "\n\n\n```\n"   
    )

    chnkVec <- c(
        chnkVec,
        NewChnk
    )
    
    
    ## Now the Volcano Plot ##
    tagV <- paste0("Volcano_", contrastVec[i])
    
    VplotList[[tagV]] <- makeVolcanoPlot(
        dfPlotData,
        geneIDcolumn,
        topNgenes = 5,
        dotsize = 1,
        legendDotSize = 5,
        sdCutOff = sdCutOff
    )
    
    ###########################################################################
    ## Save plot to file                                                     ##
    FNbase <- paste0(contrastVec[i], ".Volcano.plot", VersionPdfExt)
    FN <- paste0(reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(VplotList[[tagV]])
    dev.off()
    ##                                                                       ##
    ###########################################################################
    
    selLg10p <- selVec[grep("_lg10p_", names(dfPlotData))]
    selLogFC <- selVec[grep("_logFC_", names(dfPlotData))]
    
   
    xAxis <- selLogFC[grep(contrastVec[i], selLogFC)]
    yAxis <- selLg10p[grep(contrastVec[i], selLg10p)]
    
    link2 <- paste0('<a href="https://biologic.crick.ac.uk/',project_id,'/scatterplot?x_axis=',xAxis,'&y_axis=',yAxis,'&cat_id=ag_lab_categories__10" target="_blank">here</a>.')
    
    
    figCap <- paste0(
        '**Figure ',
        figureCount,
        'B:** Volcanoplot ',contrastVec[i],'. ',
        'Download a pdf of this figure <a href="',FNrel,'" target = "_blank">here</a>. ',
        'An interactive version of this plot can be found ' , link2, '.'
    )
 
    figureCount <- figureCount + 1
   
    NewChnk <- paste0(
            "\n```{r V_",contrastVec[i],", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
            "\n",
            "\n print(VplotList[['",tagV,"']])",
            "\n cat(  '\n')",
            "\n\n\n```\n"   
    )

    chnkVec <- c(
        chnkVec,
        NewChnk
    )
}

if (length(contrastVec) > 2){
    tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
    tabVar <- ".tabset .tabset-fade .tabset-pills"
}

Differential Gene Expression Analysis (DGE)

MA-Plot Colon_T_vs_Colon_N

Figure 19A: Volcano and MA-plot Plot Colon_T_vs_Colon_N. Download a pdf of this figure here. An interactive version of this plot can be found here.

Figure 20B: Volcanoplot Colon_T_vs_Colon_N. Download a pdf of this figure here. An interactive version of this plot can be found here..

MA-Plot Kidney_T_vs_Kidney_N

Figure 21A: Volcano and MA-plot Plot Kidney_T_vs_Kidney_N. Download a pdf of this figure here. An interactive version of this plot can be found here.

Figure 22B: Volcanoplot Kidney_T_vs_Kidney_N. Download a pdf of this figure here. An interactive version of this plot can be found here..

MA-Plot Liver_T_vs_Liver_N

Figure 23A: Volcano and MA-plot Plot Liver_T_vs_Liver_N. Download a pdf of this figure here. An interactive version of this plot can be found here.

Figure 24B: Volcanoplot Liver_T_vs_Liver_N. Download a pdf of this figure here. An interactive version of this plot can be found here..

MA-Plot Lung_T_vs_Lung_N

Figure 25A: Volcano and MA-plot Plot Lung_T_vs_Lung_N. Download a pdf of this figure here. An interactive version of this plot can be found here.

Figure 26B: Volcanoplot Lung_T_vs_Lung_N. Download a pdf of this figure here. An interactive version of this plot can be found here..

MA-Plot Skin_T_vs_Skin_N

Figure 27A: Volcano and MA-plot Plot Skin_T_vs_Skin_N. Download a pdf of this figure here. An interactive version of this plot can be found here.

Figure 28B: Volcanoplot Skin_T_vs_Skin_N. Download a pdf of this figure here. An interactive version of this plot can be found here..

MA-Plot panTumor_vs_panNormal

Figure 29A: Volcano and MA-plot Plot panTumor_vs_panNormal. Download a pdf of this figure here. An interactive version of this plot can be found here.

Figure 30B: Volcanoplot panTumor_vs_panNormal. Download a pdf of this figure here. An interactive version of this plot can be found here..

if (!exists("sdCutOff")){
  sdCutOff <- 2  
}


## Create enriched genes list ##
EnrichedGenesList <- list()

contrastSel <- c(
    names(dfMainData)[grep("contrast_[0-9]{1,2}", names(dfMainData))],
    names(dfMainData)[grep("contrast_D[0-9]{1,2}", names(dfMainData))]
)

DGEtagVec <- as.vector(sapply(
    contrastSel[grep("logFC", contrastSel)],
    function(x) unlist(strsplit(x, "logFC_"))[2]
))

selVec <- c(
    geneIDcolumn,
    contrastSel
)

dfAllPlots <- dfMainData[,selVec]

if (geneIDcolumn != "mgi_symbol" & geneIDcolumn != "hgnc_symbol") {
    queryGS <- "hgnc_symbol" 
} else {
    queryGS <- Obio@parameterList$geneIDcolumn
}

for (i in 1:length(DGEtagVec)){
    tag <- paste0("Enrichments_HG_", DGEtagVec[i])  
    tagGLpos <- paste0(DGEtagVec[i], "_pos") 
    tagGLneg <- paste0(DGEtagVec[i], "_neg") 
    
    selVec <- c(
        geneIDcolumn,
        names(dfAllPlots)[grep(paste0("lg2BaseMean_", DGEtagVec[i],"$"), names(dfAllPlots))],
        names(dfAllPlots)[grep(paste0("logFC_",DGEtagVec[i],"$"), names(dfAllPlots))],
        names(dfAllPlots)[grep(paste0("padj_",DGEtagVec[i],"$"), names(dfAllPlots))],
        names(dfAllPlots)[grep(paste0("lg10p_",DGEtagVec[i],"$"), names(dfAllPlots))]
    )
    
    dfPlot <- dfAllPlots[,selVec]
    pos <- grep("included", names(dfPlot))
    if (length(pos) == 0){
        dfPlot[["included"]] <- "+"
    }
    
    lgFCsel <- sdCutOff * sd(dfPlot[,grep("_logFC_", names(dfPlot))])
    
    
    dfPlot[["DGE_Status"]] <- "Unchanged"
    dfPlot[dfPlot[,grep("_logFC_", names(dfPlot))] > lgFCsel & dfPlot[,grep("_padj_", names(dfPlot))] < 0.05, "DGE_Status"] <- "Up"
    EnrichedGenesList[[tagGLpos]] <- unique(dfPlot[dfPlot$DGE_Status == "Up", geneIDcolumn])
    
    dfPlot[dfPlot[,grep("_logFC_", names(dfPlot))] < -1 *lgFCsel & dfPlot[,grep("_padj_", names(dfPlot))] < 0.05, "DGE_Status"] <- "Down"
    EnrichedGenesList[[tagGLneg]] <- unique(dfPlot[dfPlot$DGE_Status == "Down", geneIDcolumn])
    print(i)
} 

library(knitr)
library(ggplot2)

#save.image("temp.RData")
library(clusterProfiler)

    gmtList <- list()
     dbtableList <- list(
          # "GO-MF" = "mysigdb_c5_MF",
          "Pathways" = "mysigdb_c2_1329_canonical_pathways",
          "HallMarks" = "mysigdb_h_hallmarks"
      )
    
    
    
   
    
    for (i in 1:length(dbtableList)){
        
        dfTemp <- unique(import.db.table.from.db(
            host = Obio@dbDetailList$host,
            dbname = Obio@dbDetailList$ref.cat.db,
            dbtable = dbtableList[[i]],
            password = db.pwd,
            user = Obio@dbDetailList$db.user
        ))
        
        ## Remove duplicated entries ##
        dfTemp <- dfTemp[!(duplicated(dfTemp$cat_name)),]
        
        rmVec <- grep("temp_", dfTemp$cat_type)
        if (length(rmVec) > 0){
            dfTemp <- dfTemp[-rmVec, ]
        }
        
        dfTemp <- unique(dbcat2gmt(
            df.cat = dfTemp, # As downloaded from reference_categories_db_new database
            gene.id.column = queryGS
        ))
        
        dfTemp <- unique(dfTemp[!duplicated(as.vector(dfTemp[,1])), ])
        
        write.table(
            dfTemp,
            "temp.gmt.txt",
            row.names = F, 
            sep = "\t",
            col.names = F,
            quote = F
        )
        
        CPgmt <- read.gmt("temp.gmt.txt")
        unlink("temp.gmt.txt")
        CPgmt <- unique(CPgmt[CPgmt$gene != "", ])
        
        gmtList[[dbtableList[[i]]]] <- CPgmt
    }
    
    ## Edit collection names for plot
    names(gmtList) <- gsub("mysigdb_h_hallmarks", "HallMarkCats",names(gmtList))
    names(gmtList) <- gsub("mysigdb_", "",names(gmtList))
    names(gmtList) <- gsub("c2_1329_canonical_p", "P",names(gmtList))
    names(gmtList) <- gsub("sc_sig", "CellSig",names(gmtList))
    names(gmtList) <- gsub("cibersort_L22", "CellSig",names(gmtList))
    names(gmtList) <- gsub("c5_", "GO_",names(gmtList))
    names(gmtList) <- gsub("networkcategories", "Complexes",names(gmtList))
    
    ## Done creating gmt list
    ###########################
    
    ## Select colors ##
    library(scales)
    enrCols <- hue_pal()(length(gmtList))
    names(enrCols) <- substr(names(gmtList),1,10)



plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")



for (j in 1:length(DGEtagVec)){
    posTestGeneSet <- as.vector(
        unique(
            EnrichedGenesList[[paste0(DGEtagVec[j], "_pos")]]
        )
    )
    
    
    negTestGeneSet <- as.vector(
        unique(
            EnrichedGenesList[[paste0(DGEtagVec[j], "_neg")]]
        )
    )
   
    
    ###########################################################################
    ## Create GMT file for category enrichment                               ##
    
    ###########################
    ## Create gmt list
    ## Retrieve gmt files from database
    ## Add custom gmt files
    
    
    
    
    ## Done                                                                  ##
    ###########################################################################
     
    library(clusterProfiler)
    library(ggplot2)
    library(tidyr)
        
        if (geneIDcolumn != "mgi_symbol" & geneIDcolumn != "hgnc_symbol") {
            queryGS <- "hgnc_symbol" 
        } else {
            queryGS <- geneIDcolumn
        }
        
        if (Obio@dbDetailList$host == "10.27.241.234"){
            urlString <- "biologic.thecrick.org"
        } else {
            urlString <- "biologic.crick.ac.uk"
        }
    
    colVec <- c("red", "blue")
    pvalueCutoff <- 0.5
    topMaxCat <- 10
    
    ## Get background gene set ##
    #backgroundGeneVec <- row.names(OsC[["RNA"]]@counts)
    if ((length(posTestGeneSet) >= 3) | (length(negTestGeneSet) >= 3)){
        ## Do enrichment ##
        first <- TRUE
        if (length(posTestGeneSet) >= 3){
            for (k in 1:length(gmtList)){
                    egmt <- data.frame(
                        enricher(
                            negTestGeneSet, 
                            TERM2GENE=gmtList[[k]],
                            pvalueCutoff = pvalueCutoff
                        )
                    )
                    if (!is.null(egmt)){
                        if (nrow(egmt) > 0){
                            egmt[["Collection"]] <- substr(names(gmtList)[k], 1,10)
                        }
                        if (first){
                            dfTempEnriched <- egmt    
                            first <- FALSE
                        } else {
                            dfTempEnriched <- rbind(
                                dfTempEnriched, 
                                egmt
                            )    
                        }
                        
                    }
            }
            if (nrow(dfTempEnriched) > 0){
                dfTempEnriched[["direction"]] <- "positive"
                dfTempEnriched[["log10FDR"]] <- log10(dfTempEnriched$p.adjust)
                dfTempEnriched <- dfTempEnriched[order(dfTempEnriched$log10FDR, decreasing = F),]
                dfTempEnriched <- na.omit(dfTempEnriched)
                
                if (nrow(dfTempEnriched) > topMaxCat){
                    dfTempEnriched <- dfTempEnriched[1:topMaxCat, ]
                }
            }
          
            
        } # end positive
            
            ## Now the negative side ##
            if (length(negTestGeneSet) >= 3){
            first <- TRUE
            for (k in 1:length(gmtList)){
                    egmt <- data.frame(
                        enricher(
                            posTestGeneSet, 
                            TERM2GENE=gmtList[[k]],
                            pvalueCutoff = pvalueCutoff
                        )
                    )
                    if (!is.null(egmt)){
                        if (nrow(egmt) > 0){
                            egmt[["Collection"]] <- substr(names(gmtList)[k], 1,10)
                        }
                        if (first){
                            dfTempEnrichedNeg <- egmt    
                            first <- FALSE
                        } else {
                            dfTempEnrichedNeg <- rbind(
                                dfTempEnrichedNeg, 
                                egmt
                            )    
                        }
                        
                    } 
            }
            if (nrow(dfTempEnrichedNeg) > 0){
                dfTempEnrichedNeg[["direction"]] <- "negative"
                dfTempEnrichedNeg[["log10FDR"]] <- -1*log10(dfTempEnrichedNeg$p.adjust)
                dfTempEnrichedNeg <- dfTempEnrichedNeg[order(dfTempEnrichedNeg$log10FDR, decreasing = T),]
                dfTempEnrichedNeg <- na.omit(dfTempEnrichedNeg)
                
                if (nrow(dfTempEnrichedNeg) > topMaxCat){
                    dfTempEnrichedNeg <- dfTempEnrichedNeg[1:topMaxCat, ]
                }
            }
            } # end negative
        
            
            
            ## Make plot 
            if (!exists("dfTempEnriched")){
                dfTempEnriched <- data.frame(
                    ID = character(0),
                    Term = character(0),
                    p.adjust = numeric(0),
                    pvalue = numeric(0),
                    qvalue = numeric(0),
                    geneID = character(0),
                    Count = numeric(0),
                    Collection = character(0),
                    direction = character(0),
                    log10FDR = numeric(0)
                )
            }

            if (!exists("dfTempEnrichedNeg")){
                dfTempEnrichedNeg <- data.frame(
                    ID = character(0),
                    Term = character(0),
                    p.adjust = numeric(0),
                    pvalue = numeric(0),
                    qvalue = numeric(0),
                    geneID = character(0),
                    Count = numeric(0),
                    Collection = character(0),
                    direction = character(0),
                    log10FDR = numeric(0)
                )
            }

            if ((nrow(dfTempEnriched) > 0) | (nrow(dfTempEnrichedNeg) > 0)){
            
            dfSel <- rbind(
                dfTempEnriched,
                dfTempEnrichedNeg
            )
            
            dfSel <- na.omit(dfSel)
            dfSel <- dfSel[order(dfSel$log10FDR),]
            dfSel$log10FDR <- round(dfSel$log10FDR, 2)
            
            dfSel[["Category"]] <- ""
            dfSel[dfSel$log10FDR >= 0, "Category"] <- "Enr."
            dfSel[dfSel$log10FDR < 0, "Category"] <- "Depl."
            
            for (l in 1:nrow(dfSel)){
                if (nchar(dfSel[l, "ID"]) > 30){
                    part1 <- substr(dfSel[l, "ID"], 1, 30)
                    part2 <- substr(dfSel[l, "ID"], 31, 60)
                    dfSel[l, "ID"] <- paste0(part1, " \\n", part2)
                  
                }
            }
            
            
            #dfSel$Term <- gsub("\\(GO", "\\\n\\(GO", dfSel$Term)
            
            dfSel$ID <- factor(dfSel$ID, levels = unique(dfSel$ID))
            
            
            
            plotList[[paste0("PCA_ENR_", j)]] <- ggplot(
                data=dfSel, aes(x= ID, y=log10FDR, fill=Collection, order=log10FDR)
            ) + geom_bar(stat="identity", colour="black"
            ) + coord_flip() +scale_fill_manual(values=enrCols
            ) + theme_bw(
            )  +  theme(
                axis.text.y   = element_text(size=8),
                axis.text.x   = element_text(size=8),
                axis.title.y  = element_text(size=8),
                axis.title.x  = element_text(size=8),
                axis.line = element_line(colour = "black"),
                panel.border = element_rect(colour = "black", fill=NA, size=1),
                plot.title = element_text(hjust = 0.5, size = 12)
            )  + labs(title = paste0("Comparison ", DGEtagVec[j]," enriched genes") ,y = "-log10(FDR)", x = ""
            ) + geom_hline(yintercept = c(-log10(0.05), log10(0.05)), color = "grey", size=0.5, lty=2
            ) + geom_hline(yintercept = 0, color = "black", size=0.5
            ) 
            cat("  \n")
            
            
            
            ## Save to file ##
            FNbase <- paste0("DGE_comparison_", DGEtagVec[j],".enriched.genes", VersionPdfExt)
            FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
            FNrel <- paste0("report_figures/", FNbase)
            
           
            pdf(FN)
            print(plotList[[paste0("PCA_ENR_", j)]])
            dev.off()
            
            link <- paste0(
                '<a href="https://', urlString, '/',
                Obio@parameterList$project_id,
                '/category-view?category_type=GO-BP" target="_blank">CategoryView</a>'
            )
            
            ## Create R markdown chunk ##
            figLegend <- paste0(
                '**Figure ', 
                figureCount, 
                '**: Category enrichment analysis for the top genes that have  <font color = "',colVec[2],'"> the most positive </font> and <font color = "',colVec[1],'">the most negative</font> PCA loading values in dimension ', 
               DGEtagVec[j],
                ' associated with them. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. To view these gene sets in the context of your data, go to ',link,' and find these categories using the search box.'
            )
            figureCount <- figureCount + 1 
            
            NewChnk <- paste0(
                "## ", DGEtagVec[j],
                "\n```{r enrichr_",
                j,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                figLegend,"'}\n",
                "\n",
                "\n print(plotList[['",paste0("PCA_ENR_", j),"']])",
                "\n cat(  '\n')",
                "\n\n\n```\n"   
            )
            
            chnkVec <- c(
                chnkVec,
                NewChnk
            )
        }
            
            
            ## done with plot 
            
    } ## Done with per dimension loops
}        
      
 

if (length(plotList) > 3){
    tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
    tabVar <- ".tabset .tabset-fade .tabset-pills"
}

Category Enrichments - Hypergeometric Test

###############################################################################
## Do category enrichment on clusters                                        ##
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))

Colon_T_vs_Colon_N

Figure 31: Category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension Colon_T_vs_Colon_N associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.

Kidney_T_vs_Kidney_N

Figure 32: Category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension Kidney_T_vs_Kidney_N associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.

Liver_T_vs_Liver_N

Figure 33: Category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension Liver_T_vs_Liver_N associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.

Lung_T_vs_Lung_N

Figure 34: Category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension Lung_T_vs_Lung_N associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.

Skin_T_vs_Skin_N

Figure 35: Category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension Skin_T_vs_Skin_N associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.

panTumor_vs_panNormal

Figure 36: Category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension panTumor_vs_panNormal associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.

## Done doing enrichment on clusters                                         ##
###############################################################################

Gene Set Enrichment Analysis (GSEA)

if (!exists("project_id")){
    project_id <- gsub("_designTable", "", designTB)
}

if (!exists("labname")){
    labname <- "TBD"
}

if (!exists("reportFigDir") || is.null(reportFigDir)){
    reportFigDir <- ""
}

if (!exists("reportTableDir") || is.null(reportTableDir)){
    reportTableDir <- ""
}

if (!exists("VersionPdfExt")){
    VersionPdfExt <- paste0(".V", gsub("-", "", Sys.Date()), ".pdf")
}

Find furter background information on the Gene Set Enrichment Analysis (GSEA) and the interpretation of results can be found here. Here the improved fgsea algorithm will be used to calculate enrichment scores.

if (!exists("VersionPdfExt")){
    VersionPdfExt <- paste0(".V", gsub("-", "", Sys.Date()), ".pdf")
}

if (!exists("Obio") || is.null(Obio@parameterList$workdir)){
    workdir <- getwd()
}

chnkVec <- as.vector(NULL, mode = "character")
plotList <- list()

plotListER <- list()
chnkVecER <- list()


###############################################################################
## Prepare GMT file                                                          ##

## Option A - from database ##
if (exists("Obio") && !is.null(Obio@parameterList$GSEAtables)){
  tables <- Obio@parameterList$GSEAtables
  print(
      paste0(
        "The following gene sets have been used in the GSEA analysis: ",   
        sort(paste0(names(Obio@referenceTableList), collapse = ",")), 
        "."
      )
    )
} else {
  tables <- c(
    "mysigdb_h_hallmarks",
    "mysigdb_c5_BP" #,
    #Obio@parameterList$lab.categories.table
  )
}


# #
dfRefGmt <- create.gmt.file.from.ref.data.table(
     host = Obio@dbDetailList$host,
     dbname = "reference_categories_db_new",
     dataTable = tables,
     pwd = db.pwd,
     user=Obio@dbDetailList$db.user,
     gene.id.column = "hgnc_symbol"
 )


localGmtDir <- paste0(
    Obio@parameterList$workdir,
    "GSEA/"
)

if (!exists(localGmtDir)){
  dir.create(localGmtDir)
}

#
gmtDir<- paste0(
    Obio@parameterList$workdir,
    "GSEA/"
)

gmtFileName <- paste0(
    project_id,
    ".",
    "projectGmtFile.gmt"
)

dfRefGmt <- dfRefGmt[!(duplicated(dfRefGmt[,1])),]

dfPathwayAnno <- unique(data.frame(cat_id = row.names(dfRefGmt), cat_name = dfRefGmt[,1]), url=dfRefGmt[,2])
dfRefGmt[,2] <- NULL


## transform all columns
empty_as_na <- function(x){
    if("factor" %in% class(x)) x <- as.character(x) ## since ifelse wont work with factors
    ifelse(as.character(x)!="", x, NA)
}

dfRefGmt <- dfRefGmt %>% dplyr::mutate_each(dplyr::funs(empty_as_na))


write.table(
    dfRefGmt,
    paste0(localGmtDir, gmtFileName),
    col.names = FALSE,
    row.names = FALSE,
    sep="\t",
    quote = F
)

## Done creating project gmt. file                                           ##
###############################################################################

## Option B: Load a gmt file created by other means
# FN <- "/Volumes/babs/working/boeings/Projects/goulda/adrien.franchet/472_brains_from_drosophila_larvae_RN21220/workdir/GSEA/RN21220.projectGmtFile.gmt"
# 
# dfRefGmt <- read.delim(
#     FN, 
#     header = F,
#     sep = "\t",
#     stringsAsFactors = F
# )

###############################################################################
## Run fGSEA on all log-fold changes                                         ##

selVec <- c(
    "hgnc_symbol",
    names(dfMainData)[grep(paste0("contrast_[0-9]{1,2}_logFC"), names(dfMainData))],
    names(dfMainData)[grep(paste0("contrast_D[0-9]{1,2}_logFC"), names(dfMainData))]
)

dfGSEAdata <- unique(dfMainData[, selVec])
dfGSEAdata <- na.omit(dfGSEAdata)


if (ncol(dfGSEAdata) > 2){
    dfGSEAdata <- dfGSEAdata[rowSums(dfGSEAdata[,2:ncol(dfGSEAdata)]) != 0,]
} else {
    dfGSEAdata <- dfGSEAdata[dfGSEAdata[,2] != 0,]
}


## Delete old rnk files ##
if (!exists("localGmtDir")){
  localGmtDir <- "GSEA/"
}

unlink(paste0(localGmtDir, "*.rnk"))

biologicSeqTools2::create.gsea.rnk.files(
     workdir,
     df.dataTable = dfGSEAdata,
     GSEA.colum.type = "logFC",
     gene.symbol.column.name = "hgnc_symbol",
     GSEADir = localGmtDir
 )

rnkFileVec <- paste0(localGmtDir,list.files(localGmtDir)[grep(".rnk$", list.files(localGmtDir))])

plotList <- list()
chnkVec <- as.vector(NULL, mode="character")

## Create Excel output ##
fullOutFN <- paste0(project_id, "_GSEA.xlsx")
outFN <- paste0(project_id, "_GSEA.xlsx")
wb <- openxlsx::createWorkbook()

for (i in 1:length(rnkFileVec)){
    logFCcol <- unlist(strsplit(rnkFileVec[i], "GSEA/"))[2]
    logFCcol <- gsub(".rnk", "",logFCcol)
    lg10pCol <- gsub("logFC","lg10p", logFCcol)
    lg2BaseMeanCol <- gsub("logFC", "lg2BaseMean", logFCcol)
    
    tag <- gsub("contrast_[0-9]{1,2}_", "", logFCcol)
    tag <- paste0("GSEA_", tag)
    
    dfRnk <- read.delim(
        rnkFileVec[i],
        header=T, 
        sep = "\t"
    )
    
    GSEAranks <- dfRnk$logFC
    names(GSEAranks) <- dfRnk$hgnc_symbol
    
    pathways <- fgsea::gmtPathways(paste0(localGmtDir, gmtFileName))
    
    set.seed(42)
    fgseaRes <- fgsea::fgsea(
        pathways = pathways, 
        stats    = GSEAranks,
        minSize  = 10,
        maxSize  = 2500
    )
    
    
    ###########################################################################
    ## Make top-bottom 10 plot                                               ##
    N <- 10
    ## Top N up NES ##
    topNup <- as.vector(unlist(fgseaRes[order(fgseaRes$NES, decreasing = T),"pathway"]))[1:N]
    
    ## Top N down NES
    topNdown <- as.vector(unlist(fgseaRes[order(fgseaRes$NES, decreasing = F),"pathway"]))[1:N]
    
    topPathways <- c(topNup,rev(topNdown))
    
    ## Necessary to load fgsea to get the gridExtra package loaded ##
    dfTable <- fgseaRes
    dfTable$pathway <- substr(gsub("_", " ", dfTable$pathway),1,60)
    
    library(fgsea)
    pdf("temp.pdf")
   plotList[[tag]] <- plotGseaTable(
        pathways = pathways[topPathways], 
        stats = GSEAranks, 
        fgseaRes = dfTable, 
        gseaParam=0.5,
        colwidths = c(5, 3, 0, 0, 0), 
        render = FALSE
    ) 
   
   dev.off()
   unlink("temp.pdf")
    
   ###########################################################################
    ## Save plot to file                                                     ##
    FNbase <- paste0("GSEAsummary.", tag,VersionPdfExt)
    FN <- paste0(reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(grid::grid.draw(plotList[[tag]]))
        
    dev.off()
    ##                                                                       ##
    ###########################################################################
    
    figCap <- paste0(
    '**Figure ',
    figureCount,
    ':** Top up- and downregulated GSEA gene categories for the log-FC comparison ', tag, '. ',
        'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
    )
    
    figureCount <- figureCount + 1 
    
    NewChnk <- paste0(
            "### ", tag,
            "\n```{r ",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
            "\n",
            "\n print(grid::grid.draw(plotList[['",tag,"']]))",
            "\n cat(  '\n')",
            "\n\n\n```\n"   
    )
    
    chnkVec <- c(
        chnkVec,
        NewChnk
    )
   
    ## Done                                                                  ##
    ###########################################################################
    
    ###########################################################################
    ## Make category plots 
    ## stump ##
    # library(fgsea)
    # plotListER[[tag]] <- plotEnrichment(pathways[[topNup[1]]],
    #            exampleRanks) + labs(title=gsub("_", " ", topNup[1]))
    ##
    ###########################################################################
    
    fgseaRes <- unique(fgseaRes[,c("pathway", "NES", "padj")])
    #fgseaRes <- fgseaRes[fgseaRes$padj < 0.05,]
    
    dfTempAnno <- dfPathwayAnno[dfPathwayAnno$cat_name %in% fgseaRes$pathway,]
    
    fgseaRes <- merge(
        dfTempAnno, 
        fgseaRes, 
        by.x = "cat_name",
        by.y = "pathway"
    )
    
    fgseaRes[["GSEA"]] <- tag
    
    fgseaRes[["Volcanoplot_Link"]] <- paste0(
        'https://biologic.crick.ac.uk/', 
        Obio@parameterList$project_id, 
        '/scatterplot?x_axis=',
        logFCcol, 
        '&y_axis=',
        lg10pCol,
        '&cat_id=',
        fgseaRes$cat_id
    )
    
    fgseaRes[["MAplot_Link"]] <- paste0(
        'https://biologic.crick.ac.uk/', 
        Obio@parameterList$project_id, 
        '/scatterplot?x_axis=',
        lg2BaseMeanCol, 
        '&y_axis=',
        logFCcol, 
        '&cat_id=',
        fgseaRes$cat_id
    )
    
    ###############################################################################
    ## Add scatterplot and heatmap urls                                          ##
    fgseaRes[["Heatmap_Link"]] <- paste0('https://biologic.crick.ac.uk/', Obio@parameterList$project_id, '/category-view/', fgseaRes$cat_id)
    
    ## Done                                                                      ##
    ###############################################################################

    
    ###########################################################################
    ## Save plot to file                                                     ##
    #library(openxlsx)
    
    FNTbase <- outFN
    FNT <- paste0(Obio@parameterList$reportTableDir, FNTbase)
    FNTrel <- paste0("report_tables/", FNTbase)
        
    
    
    sn <- gsub("GSEA_", "", substr( paste0(tag, "_GSEA"), 1, 27))
    sn <- paste0(i, "_", sn)
    openxlsx::addWorksheet(wb, sn)
    openxlsx::freezePane(wb, sn ,  firstActiveRow = 2)
    
    hs1 <- openxlsx::createStyle(
        fontColour = "#ffffff",
        fgFill = "#000000", 
        halign = "CENTER", 
        textDecoration = "Bold"
    )
    
    openxlsx::writeData(wb, sheet=sn, fgseaRes, startRow = 1, startCol = 1, headerStyle = hs1)
  
    ##                                                                       ##
    ###########################################################################        

    
    if (i==1){
        dfRes <- fgseaRes
    } else {
        dfRes <- rbind(
            dfRes, 
            fgseaRes
        )
    }
    print(paste0(tag, " done."))
}


 openxlsx::saveWorkbook(
        wb, 
        FNT,
        overwrite = TRUE
    )




if (length(plotList) > 2){
    tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
    tabVar <- ".tabset .tabset-fade .tabset-pills"
}

Top up-/down-regulated GSEA categories

GSEA_logFC_Colon_T_vs_Colon_N

Figure 37: Top up- and downregulated GSEA gene categories for the log-FC comparison GSEA_logFC_Colon_T_vs_Colon_N. Download a pdf of this figure here.Figure 37: Top up- and downregulated GSEA gene categories for the log-FC comparison GSEA_logFC_Colon_T_vs_Colon_N. Download a pdf of this figure here.

GSEA_logFC_Kidney_T_vs_Kidney_N

Figure 38: Top up- and downregulated GSEA gene categories for the log-FC comparison GSEA_logFC_Kidney_T_vs_Kidney_N. Download a pdf of this figure here.Figure 38: Top up- and downregulated GSEA gene categories for the log-FC comparison GSEA_logFC_Kidney_T_vs_Kidney_N. Download a pdf of this figure here.

GSEA_logFC_Liver_T_vs_Liver_N

Figure 39: Top up- and downregulated GSEA gene categories for the log-FC comparison GSEA_logFC_Liver_T_vs_Liver_N. Download a pdf of this figure here.Figure 39: Top up- and downregulated GSEA gene categories for the log-FC comparison GSEA_logFC_Liver_T_vs_Liver_N. Download a pdf of this figure here.

GSEA_logFC_Lung_T_vs_Lung_N

Figure 40: Top up- and downregulated GSEA gene categories for the log-FC comparison GSEA_logFC_Lung_T_vs_Lung_N. Download a pdf of this figure here.Figure 40: Top up- and downregulated GSEA gene categories for the log-FC comparison GSEA_logFC_Lung_T_vs_Lung_N. Download a pdf of this figure here.

GSEA_logFC_Skin_T_vs_Skin_N

Figure 41: Top up- and downregulated GSEA gene categories for the log-FC comparison GSEA_logFC_Skin_T_vs_Skin_N. Download a pdf of this figure here.Figure 41: Top up- and downregulated GSEA gene categories for the log-FC comparison GSEA_logFC_Skin_T_vs_Skin_N. Download a pdf of this figure here.

GSEA_logFC_panTumor_vs_panNormal

Figure 42: Top up- and downregulated GSEA gene categories for the log-FC comparison GSEA_logFC_panTumor_vs_panNormal. Download a pdf of this figure here.Figure 42: Top up- and downregulated GSEA gene categories for the log-FC comparison GSEA_logFC_panTumor_vs_panNormal. Download a pdf of this figure here.

###############################################################################
## Make Volcanoplot                                                          ##
makeVolcanoPlotGSEA <- function(
    dfPlotData,
    geneIDcolumn,
    topNgenes = 5,
    dotsize = 1,
    legendDotSize = 5,
    sdCutOff = 1
){
    headline <- names(dfPlotData)[grep("GSEA", names(dfPlotData))]
    #headline <- unlist(strsplit(headline, "logFC_"))[2]
    
    #names(dfPlotData) <- gsub("contrast_[0-9]{1,2}_", "", names(dfPlotData))
    
    logFCcolName <- names(dfPlotData)[grep("NES", names(dfPlotData))]
    lg10pColName <- names(dfPlotData)[grep("lg10p", names(dfPlotData))]
    padjColName <- names(dfPlotData)[grep("^padj$", names(dfPlotData))]
    
    ## Now let's get these data columns out of the main data table.
    dfPlotData <- dfPlotData[dfPlotData[,logFCcolName] != 0, ]
    dfPlotData[,geneIDcolumn] <- as.character(dfPlotData[,geneIDcolumn] )
    
    ## Determine logFC cut-off for the Volcano Plot ##
    lgFCsel <- sdCutOff * sd(dfPlotData[, logFCcolName])
    
    dfPlotData[["color"]] <- "NS"
    dfPlotData[dfPlotData[, logFCcolName] > lgFCsel & dfPlotData[, padjColName] < 0.05, "color"] <-  "Up"
    
    dfPlotData[dfPlotData[, logFCcolName] < -1*lgFCsel & dfPlotData[, padjColName] < 0.05, "color"] <-  "Down"
    
    ## Re-order dfPlotData for better results
    
    ## Let's have a look at the color vector
    
    
    colorVec <- c("blue", "red","black")
    
    names(colorVec) <- c("Down", "Up", "NS")
    
    
    ## And here is the resulting color vector
    colorVec <- colorVec[names(colorVec) %in% dfPlotData$color]
    
    dfPlotData$color <- factor(dfPlotData$color, levels = names(colorVec))
    dfPlotData <- dfPlotData[order(dfPlotData$color, decreasing = F), ]
    
    ## And here is the resulting color vector
    colorVec <- colorVec[names(colorVec) %in% dfPlotData$color]
    
    dfPlotData$color <- factor(dfPlotData$color, levels = names(colorVec))
    dfPlotData <- dfPlotData[order(dfPlotData$color, decreasing = F), ]
    
    ## Now let's also add a label for the 10 most significantly up- and down-regulated genes.This number can be changed in the variable Nsel. Here we use the R package ggrepel.
    
    library(ggrepel)
    
    ## Let's order the data frame by log-fold change
    dfPlotData <- dfPlotData[order(dfPlotData[,logFCcolName], decreasing = T), ]
    topGenes <- as.vector(dfPlotData[1:topNgenes,geneIDcolumn])
    
    dfPlotData <- dfPlotData[order(dfPlotData[,logFCcolName], decreasing = F), ]
    bottomGenes <- as.vector(dfPlotData[1:topNgenes,geneIDcolumn])
    
    dfPlotData[["label"]] <- ""
    dfPlotData[dfPlotData[,geneIDcolumn] %in% c(topGenes, bottomGenes), "label"] <- dfPlotData[dfPlotData[,geneIDcolumn] %in% c(topGenes, bottomGenes), geneIDcolumn]
    
xMaxVal <- max(abs(dfPlotData[,logFCcolName]))
    
pVolcano <- ggplot(
        data = dfPlotData, 
        aes_string(x=logFCcolName, y=lg10pColName, color = "color",label = "label")
    ) + geom_hline(yintercept = 0, color = "black", size=0.5
    ) + geom_hline(yintercept = -1*log10(0.05), color = "grey", size=0.5, linetype = 2
    ) + geom_vline(xintercept = 0, color = "black", size=0.5
    ) + geom_vline(xintercept = c(-1*lgFCsel,lgFCsel), color = "grey", size=0.5, linetype = 2      ) + geom_point( shape=16, size = dotsize
    ) + scale_colour_manual(name = "Variability" ,values = colorVec
    
    ) + theme_bw(
    )  +  theme(
        axis.text.y   = element_text(size=8),
        axis.text.x   = element_text(size=8),
        axis.title.y  = element_text(size=8),
        axis.title.x  = element_text(size=8),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        plot.title = element_text(hjust = 0.5, size = 12)
    ) + xlim(-1*xMaxVal,xMaxVal
    ) + ggtitle(paste0("GSEA NES Volcano Plot ", contrastVec[i]) 
    ) + xlab(gsub("_", " ", logFCcolName)
    ) + ylab(gsub("_", " ", lg10pColName)            
    ) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
    ) 

    plotVolcanoWithLabels <- pVolcano + geom_text_repel(size = 3)

return(plotVolcanoWithLabels)
}
## Done Volcanoplot                                                          ##
###############################################################################


chnkVec <- as.vector(NULL, mode = "character")
plotList <- list()

tagVec <-  unique(dfRes$GSEA)

for (i in 1:length(tagVec)){
    tag <- paste0("V_",tagVec[i])
    dfPlot <- dfRes[dfRes$GSEA == tagVec[i], c("GSEA", "cat_id", "cat_name", "NES", "padj")]
    dfPlot <- na.omit(dfPlot)
    #dfPlot[["label"]] <- ""
    dfPlot <- dfPlot[order(dfPlot$NES, decreasing = T), ]
    minP <- min(dfPlot$padj[dfPlot$padj != 0])
    dfPlot[["lg10padj"]] <- 0
    dfPlot[dfPlot$padj != 0, "lg10padj"] <- -1*log10(dfPlot$padj)
    
    ## Function is defined in module C9.
    plotList[[tag]] <- makeVolcanoPlotGSEA(
        dfPlotData = dfPlot,
        geneIDcolumn = "cat_name",
        topNgenes = 5,
        dotsize = 1,
        legendDotSize = 5,
        sdCutOff = 1
    )
    
    ###########################################################################
    ## Save plot to file                                                     ##
    FNbase <- paste0(tag, ".volcano.plot", VersionPdfExt)
    FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(plotList[[tag]])
    dev.off()
    ##                                                                       ##
    ###########################################################################
    
    # selLg2BM<- selVec[grep("lg2BaseMean_", names(dfPlotData))]
    # selLogFC <- selVec[grep("_logFC_", names(dfPlotData))]
    # 
    #  
    # xAxis <- selLg2BM[grep(contrastVec[i], selLg2BM)]
    # yAxis <- selLogFC[grep(contrastVec[i], selLogFC)]
    # 
    # link1 <- paste0('<a href="https://biologic.crick.ac.uk/',Obio@parameterList$project_id,'/scatterplot?x_axis=',xAxis,'&y_axis=',yAxis,'&cat_id=ag_lab_categories__10" target="_blank">here</a>.')
    
    
    figCap <- paste0(
        '**Figure ',
        figureCount,
        'A:** GSEA NES Volcano Plot ',tag,'. This may plot might help to determine the overall significance of GSEA category enrichments in this experiment.',
        'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
        #'An interactive version of this plot can be found ', link1
    )
 
    figureCount <- figureCount + 1
   
    NewChnk <- paste0(
            "### GSEAV-Plot ",tag,
            "\n```{r ",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
            "\n",
            "\n print(plotList[['",tag,"']])",
            "\n cat(  '\n')",
            "\n\n\n```\n"   
    )

    chnkVec <- c(
        chnkVec,
        NewChnk
    )
    
      
}

if (length(plotList) > 2){
    tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
    tabVar <- ".tabset .tabset-fade .tabset-pills"
}

Diagnotstic GSEA Volcano Plots

GSEAV-Plot V_GSEA_logFC_Colon_T_vs_Colon_N

Figure 43A: GSEA NES Volcano Plot V_GSEA_logFC_Colon_T_vs_Colon_N. This may plot might help to determine the overall significance of GSEA category enrichments in this experiment.Download a pdf of this figure here.

GSEAV-Plot V_GSEA_logFC_Kidney_T_vs_Kidney_N

Figure 44A: GSEA NES Volcano Plot V_GSEA_logFC_Kidney_T_vs_Kidney_N. This may plot might help to determine the overall significance of GSEA category enrichments in this experiment.Download a pdf of this figure here.

GSEAV-Plot V_GSEA_logFC_Liver_T_vs_Liver_N

Figure 45A: GSEA NES Volcano Plot V_GSEA_logFC_Liver_T_vs_Liver_N. This may plot might help to determine the overall significance of GSEA category enrichments in this experiment.Download a pdf of this figure here.

GSEAV-Plot V_GSEA_logFC_Lung_T_vs_Lung_N

Figure 46A: GSEA NES Volcano Plot V_GSEA_logFC_Lung_T_vs_Lung_N. This may plot might help to determine the overall significance of GSEA category enrichments in this experiment.Download a pdf of this figure here.

GSEAV-Plot V_GSEA_logFC_Skin_T_vs_Skin_N

Figure 47A: GSEA NES Volcano Plot V_GSEA_logFC_Skin_T_vs_Skin_N. This may plot might help to determine the overall significance of GSEA category enrichments in this experiment.Download a pdf of this figure here.

GSEAV-Plot V_GSEA_logFC_panTumor_vs_panNormal

Figure 48A: GSEA NES Volcano Plot V_GSEA_logFC_panTumor_vs_panNormal. This may plot might help to determine the overall significance of GSEA category enrichments in this experiment.Download a pdf of this figure here.

###############################################################################
## Retrieve GSEA Table                                                       ##

dfGdat <- dfRes




###############################################################################
## Plot top-scoring categories                                               ##

## Select top 10 categories from each ##


dfGdatS <- dfGdat
## Write table as Excel into outputs ##

    

tableCap <- paste0(
'**GSEA Result Table:** Find the GSEA normalized enrichment score (NES) and the enrichment p-value in the above table. Plot entries mean that for this category and comparison a GSEA plot is readily available for download. Download the full GSEA result table as Excel file <a href = "',FNTrel,'" target="_blank">here</a>'
)

dfGdat <- dfGdatS

GSEA Result Table

chnkVec <- as.vector(NULL, mode = "character")

## Add sample group color ##

dfDataTable <- dfGdat
dfDataTable <- dfDataTable[dfDataTable$padj < 0.25,]

dfDataTable[["Volcanoplot_Link"]] <- paste0(
    '<a href="',
    dfDataTable[["Volcanoplot_Link"]],
    '" target="_blank">Cat Volcano Plot Link</a>'
)

dfDataTable[["MAplot_Link"]] <- paste0(
    '<a href="',
    dfDataTable[["MAplot_Link"]],
    '" target="_blank">Cat_MA Plot Link</a>'
)

dfDataTable[["Heatmap_Link"]] <- paste0(
    '<a href="',
    dfDataTable[["Heatmap_Link"]],
    '" target="_blank">Cat Heatmap Link</a>'
)


dfDataTable$cat_name <- gsub("_", " ", dfDataTable$cat_name)
dfDataTable$NES <- round(dfDataTable$NES, 3)
dfDataTable$padj <- scales::scientific(dfDataTable$padj, digits = 3)


GSEAcol <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(8, "Pastel1"))(length(unique(dfDataTable$GSEA)))

dfCol <- data.frame(GSEA=unique(dfDataTable$GSEA), contrastCol=GSEAcol)

dfDataTable <- merge(
    dfDataTable, 
    dfCol, 
    by.x = "GSEA",
    by.y = "GSEA"
)

dfDataTable$GSEA <- paste0(
        '<p style="background-color:',dfDataTable$contrastCol,';text-align:center">',dfDataTable$GSEA,'</p>'
    ) 

selVec <- c(
    "GSEA",
    "cat_name",         
    "NES",              
    "padj",            
    "Volcanoplot_Link", 
    "MAplot_Link",      
    "Heatmap_Link"  
)

selVec <- selVec[selVec %in% names(dfDataTable)]
dfDataTable <- unique(dfDataTable[,selVec])

dfDataTable <- dfDataTable[order(dfDataTable$NES, decreasing=F), ]

DT::datatable(
    dfDataTable,
    colnames = gsub("_", " ", names(dfDataTable)),
    rownames = FALSE,
    escape = FALSE,
    options = list(
        initComplete = htmlwidgets::JS(
            "function(settings, json) {",
            "$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});",
            "}"
        ),
    order = list( list(3, 'desc'), list(4, 'asc'))
    )
) 

GSEA Result Table: Find the GSEA normalized enrichment score (NES) and the enrichment p-value in the above table. Plot entries mean that for this category and comparison a GSEA plot is readily available for download. Download the full GSEA result table as Excel file here

chnkVec <- as.vector(NULL, mode = "character")
plotList <- list()
lrtVec <- names(Obio@DEseq2LRTtable)[grep("LRT_",names(Obio@DEseq2LRTtable))]
lrtVec <- names(Obio@DEseq2LRTtable)
lrtVec <- lrtVec[grep("lg10p", lrtVec)]


if (length(lrtVec) > 0){
for (i in 1:length(lrtVec)){
    lrtVec[i] <- unlist(strsplit(lrtVec[i], "lg10p"))[2]
    tag <- lrtVec[i]
    selVec <- c(
      Obio@parameterList$geneIDcolumn,
      paste0("contrast_L_lg2BaseMean", lrtVec[i]),
      paste0("contrast_L_lg10p", lrtVec[i])
    )
      
    dfTemp <- unique(dfMainData[,selVec])  
    names(dfTemp)[2] <- "X"
    names(dfTemp)[3] <- "Y"
    
    dfTemp <- dfTemp[!(dfTemp[,2] ==0),]
    dfTemp <- dfTemp[!(dfTemp[,3] ==0),]
    
    dfTemp <- dfTemp[order(dfTemp$Y, decreasing=T),]
    
    dfTemp[["label"]] <- ""
    dfTemp[1:10,"label"] <- dfTemp[1:10,geneIDcolumn]
    
    ###########################################################################
    ## Make plot                                                             ##
    dsize <- 1
    alpha <- I(0.5)
    shape <- 21
    legendDotSize <- 5
    
    plotList[[tag]] <- ggplot(
        data = dfTemp,
        aes(x=X, y=Y, label = label)
    #) + geom_vline(xintercept = 0, color = "grey", size=0.5
    ) + geom_hline(yintercept = c(2), color = "red", size=0.5,linetype = 2
    ) + geom_hline(yintercept = 0, color = "grey", size=0.5
    ) + geom_point(
        size=dsize,
        shape = shape,
        alpha = alpha,
        fill = "grey"
    ) + labs(
        title = paste0("LRT padj vs. BaseMean ", lrtVec[i]), 
        x = paste0("Base Mean/Intensity", lrtVec[i],")"),
        y = paste0("-log10(LRT-pval ", lrtVec[i],")") 
    ) + theme_bw(
    ) +  theme(
        axis.text.y   = element_text(size=8),
        axis.text.x   = element_text(size=8),
        axis.title.y  = element_text(size=12),
        axis.title.x  = element_text(size=12),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        plot.title = element_text(hjust = 0.5, size = 12)
        #) + scale_fill_manual(values=c("#999999", "#E69F00")
    ) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
    ) 
         
    
    ## And now let's add the labels:
    plotList[[tag]] <- plotList[[tag]] + ggrepel::geom_text_repel(size = 3)
    ## Done making plot 
    ###########################################################################
  
    ###########################################################################
    ## Save plot to file                                                     ##
    FNbase <- paste0(lrtVec[i], ".LRTplot", VersionPdfExt)
    FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(plotList[[tag]])
    dev.off()
    ##                                                                       ##
    ###########################################################################
    
   link <- paste0("https://biologic.crick.ac.uk/",Obio@parameterList$project_id,"/scatterplot?x_axis=",selVec[2],"&y_axis=",selVec[3],"&cat_id=ag_lab_categories__10")
    
    figCap <- paste0(
        '**Figure ',
        figureCount,
        '** -log10 p-value of the likelihood ratio rest vs. Base Mean/Intensity plot. Donwolad a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>',
        'An interactive version of this figure can be found <a href="',link,'" target="_blank">here</a>. '
    )
 
    figureCount <- figureCount + 1
    
    NewChnk <- paste0(
        paste0("## ",tag," \n"),
            "\n```{r, results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
            "\n",
            "\n print(plotList[['",tag,"']])",
            "\n cat(  '\n')",
            "\n\n\n```\n"   
    )

    chnkVec <- c(
        chnkVec,
        NewChnk
    )
}
}

if (length(lrtVec) > 0){
    if (length(plotList) > 2){
        tabVar <- ".tabset .tabset-fade .tabset-dropdown"
    } else {
        tabVar <- ".tabset .tabset-fade .tabset-pills"
    }
    
    sectionDisplay <- paste0("# Likelyhood-ratio Test Results (LRT) {",tabVar, "}")
    
} else {
    sectionDisplay <- ""
}

Likelyhood-ratio Test Results (LRT)

_LRT_Organ_origin

Figure 49 -log10 p-value of the likelihood ratio rest vs. Base Mean/Intensity plot. Donwolad a pdf of this figure hereAn interactive version of this figure can be found here.

_LRT_Tissue

Figure 50 -log10 p-value of the likelihood ratio rest vs. Base Mean/Intensity plot. Donwolad a pdf of this figure hereAn interactive version of this figure can be found here.

Project Summary

cat(paste0('## Check Positive Controls - Individual Genes','\n','\n',
'In order to get an overview over your latest sequencing results, you might want to look for the performance of individual genes that may serve as a positive control. You can do this by entering an official gene name into the search box in the  [GeneView section](https://biologic.crick.ac.uk/',Obio@parameterList$project_id,'/gene-view). Genes that were detected in this experiment will be suggested to you after starting to type. If a gene is not suggested, it was not detected in this experiment. The gene result on display will give you information on the amount of reads detected for the gene in question (TPM value plot. TPM values give you read-counts, normalized for the gene length and the library size - see slideshow for a detailed definition).','\n','\n',
           
'## Check Positive Controls - Gene Categories
Next you may wish to view your latest dataset through the lens of a gene category that captures all genes relevant to the process you are investigating. A number of gene categories represented in your experiment can be found in the CategoryView section, lower panel. Reference categories are organized by category class and for most reference category a weblink is given to inform you about the origin of that gene category dataset (Category Description column). Click to the category name in order to view the performance of the genes in that category in the context of your experiment. You will be presented by default with a heatmap, but you may change this to a 2D scatterplot in which the category genes are highlighted by using the pull-down menu given underneath the heatmap depiction. In addition, a table is given informing you about the log-fold changes recorded for genes in this category. You may click on individual tiles in the heatmap to be taken to the individual results for the gene.','\n','\n',
           
'It might make sense to review your data in the context of results your lab has obtained in the past or in the context of published data. In order to that bioinformatics will add gene categories to either your lab categories selection or to the selection "this experiment" in CategoryView, lower table.','\n','\n',
           
'## Result Table Download',
'\n','\n',

'<a href="report_tables/"',Obio@parameterList$project_id,'_GSEA.xlsx"  target="_blank">Download Result Table</a>','\n','\n',
'<a href="report_tables/"',Obio@parameterList$project_id,'.result.table.xlsx"  target="_blank">Download Metacore Input File</a>','\n','\n',


'## Bioinformatics Method Summary ','\n',
'Sequencing was performed on an ',Obio@parameterList$machine,' machine. The "Trim Galore!" utility version 0.4.2 was used to remove sequencing adaptors and to quality trim individual reads with the q-parameter set to 20 (1). Then sequencing reads were aligned to the mouse genome and transcriptome (Ensembl ', Obio@parameterList$genome, Obio@parameterList$release,') using RSEM version 1.3.0 (2) in conjunction with the STAR aligner version 2.5.2 (3). Sequencing quality of individual samples was assessed using FASTQC version 0.11.5 (4) and RNA-SeQC version 1.1.8 (5). Differential gene expression was determined using the R-bioconductor package DESeq2 version 1.14.1(6,7). Gene set enrichment analysis (GSEA) was conducted as described in Subramanian et al (8).','\n','\n',
           
           
           
'REF 1: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ (retrieved 03-05-2017)','\n','\n',
           
'REF 2: Bo Li and Colin N Dewey (2011). RSEM: accurate transcript quantification from RNA-Seq data with or without a reference genome. BMC Bioinformatics 12:323','\n','\n',
           
'REF 3 : Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M and Gineras TR. (2012) STAR: ultrafast universal RNASEQ aligner. Bioinformatics. 29. 15-21','\n','\n',
           
'REF 4: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ (retrieved 03-05-2017)','\n','\n',
           
'REF 5: DeLuca et al (2012). RNA-SeQC: RNA-seq metrics for quality control and process optimization. Bioinformatics (28) 1530-1532','\n','\n',
           
'REF 6: Love MI, Huber W and Anders S (2014). Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15, pp. 550.','\n','\n',
           
'REF 7: R Development Core Team (2008). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. ISBN 3-900051-07-0, URL http://www.R-project.org.','\n','\n',
           
'REF 8: Subramanian et al.(2005), Gene set enrichment analysi: A knowledge-based approach for interpreting genome-wide expression profiles. PNAS (43) 15545-15550.','\n','\n'
))

Check Positive Controls - Individual Genes

In order to get an overview over your latest sequencing results, you might want to look for the performance of individual genes that may serve as a positive control. You can do this by entering an official gene name into the search box in the GeneView section. Genes that were detected in this experiment will be suggested to you after starting to type. If a gene is not suggested, it was not detected in this experiment. The gene result on display will give you information on the amount of reads detected for the gene in question (TPM value plot. TPM values give you read-counts, normalized for the gene length and the library size - see slideshow for a detailed definition).

Check Positive Controls - Gene Categories

Next you may wish to view your latest dataset through the lens of a gene category that captures all genes relevant to the process you are investigating. A number of gene categories represented in your experiment can be found in the CategoryView section, lower panel. Reference categories are organized by category class and for most reference category a weblink is given to inform you about the origin of that gene category dataset (Category Description column). Click to the category name in order to view the performance of the genes in that category in the context of your experiment. You will be presented by default with a heatmap, but you may change this to a 2D scatterplot in which the category genes are highlighted by using the pull-down menu given underneath the heatmap depiction. In addition, a table is given informing you about the log-fold changes recorded for genes in this category. You may click on individual tiles in the heatmap to be taken to the individual results for the gene.

It might make sense to review your data in the context of results your lab has obtained in the past or in the context of published data. In order to that bioinformatics will add gene categories to either your lab categories selection or to the selection “this experiment” in CategoryView, lower table.

Result Table Download

<a href=“report_tables/”bulkliverdemo_GSEA.xlsx" target="_blank">Download Result Table

<a href=“report_tables/”bulkliverdemo.result.table.xlsx" target="_blank">Download Metacore Input File

Bioinformatics Method Summary

Sequencing was performed on an machine. The “Trim Galore!” utility version 0.4.2 was used to remove sequencing adaptors and to quality trim individual reads with the q-parameter set to 20 (1). Then sequencing reads were aligned to the mouse genome and transcriptome (Ensembl release-105) using RSEM version 1.3.0 (2) in conjunction with the STAR aligner version 2.5.2 (3). Sequencing quality of individual samples was assessed using FASTQC version 0.11.5 (4) and RNA-SeQC version 1.1.8 (5). Differential gene expression was determined using the R-bioconductor package DESeq2 version 1.14.1(6,7). Gene set enrichment analysis (GSEA) was conducted as described in Subramanian et al (8).

REF 1: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ (retrieved 03-05-2017)

REF 2: Bo Li and Colin N Dewey (2011). RSEM: accurate transcript quantification from RNA-Seq data with or without a reference genome. BMC Bioinformatics 12:323

REF 3 : Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M and Gineras TR. (2012) STAR: ultrafast universal RNASEQ aligner. Bioinformatics. 29. 15-21

REF 4: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ (retrieved 03-05-2017)

REF 5: DeLuca et al (2012). RNA-SeQC: RNA-seq metrics for quality control and process optimization. Bioinformatics (28) 1530-1532

REF 6: Love MI, Huber W and Anders S (2014). Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15, pp. 550.

REF 7: R Development Core Team (2008). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. ISBN 3-900051-07-0, URL http://www.R-project.org.

REF 8: Subramanian et al.(2005), Gene set enrichment analysi: A knowledge-based approach for interpreting genome-wide expression profiles. PNAS (43) 15545-15550.

Documentation

## [1] "Projectfolder: /nemo/stp/babs/working/boeings/Projects/boeings/stefan.boeing/621_demo_liverdemo_bulkRNAseq/scripts/bulkliverdemo/analyses/Main_Analysis"
## [1] "Project ID: bulkliverdemo"
## R version 4.5.0 (2025-04-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/local/lib/R/lib/libRblas.so 
## LAPACK: /usr/local/lib/R/lib/libRlapack.so;  LAPACK version 3.12.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices datasets  utils     methods  
## [8] base     
## 
## other attached packages:
##  [1] fgsea_1.34.2                 scales_1.4.0                
##  [3] clusterProfiler_4.16.0       knitr_1.50                  
##  [5] ggrepel_0.9.6                tidyr_1.3.1                 
##  [7] genefilter_1.90.0            lattice_0.22-6              
##  [9] RColorBrewer_1.1-3           gplots_3.2.0                
## [11] ggplot2_3.5.2                DESeq2_1.48.1               
## [13] SummarizedExperiment_1.38.1  Biobase_2.68.0              
## [15] MatrixGenerics_1.20.0        matrixStats_1.5.0           
## [17] GenomicRanges_1.60.0         GenomeInfoDb_1.44.1         
## [19] IRanges_2.42.0               S4Vectors_0.46.0            
## [21] BiocGenerics_0.54.0          generics_0.1.4              
## [23] RMySQL_0.11.1                DBI_1.2.3                   
## [25] biologicSeqTools2_0.0.0.9000 renv_1.0.2                  
## [27] remotes_2.5.0               
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.5.0           bitops_1.0-9            ggplotify_0.1.2        
##   [4] filelock_1.0.3          tibble_3.3.0            R.oo_1.27.1            
##   [7] XML_3.99-0.18           lifecycle_1.0.4         httr2_1.2.1            
##  [10] doParallel_1.0.17       vroom_1.6.5             MASS_7.3-65            
##  [13] crosstalk_1.2.1         magrittr_2.0.3          openxlsx_4.2.8         
##  [16] sass_0.4.10             rmarkdown_2.29          jquerylib_0.1.4        
##  [19] yaml_2.3.10             ggtangle_0.0.7          zip_2.3.3              
##  [22] cowplot_1.2.0           abind_1.4-8             purrr_1.1.0            
##  [25] R.utils_2.13.0          yulab.utils_0.2.0       rappdirs_0.3.3         
##  [28] circlize_0.4.16         GenomeInfoDbData_1.2.14 enrichplot_1.28.4      
##  [31] tidytree_0.4.6          annotate_1.86.1         codetools_0.2-20       
##  [34] DelayedArray_0.34.1     DOSE_4.2.0              DT_0.33                
##  [37] xml2_1.3.8              tidyselect_1.2.1        shape_1.4.6.1          
##  [40] aplot_0.2.8             UCSC.utils_1.4.0        farver_2.1.2           
##  [43] BiocFileCache_2.16.1    jsonlite_2.0.0          GetoptLong_1.0.5       
##  [46] survival_3.8-3          iterators_1.0.14        foreach_1.5.2          
##  [49] tools_4.5.0             progress_1.2.3          treeio_1.32.0          
##  [52] Rcpp_1.1.0              glue_1.8.0              SparseArray_1.8.1      
##  [55] xfun_0.52               qvalue_2.40.0           dplyr_1.1.4            
##  [58] withr_3.0.2             BiocManager_1.30.26     fastmap_1.2.0          
##  [61] caTools_1.18.3          digest_0.6.37           R6_2.6.1               
##  [64] gridGraphics_0.5-1      colorspace_2.1-1        GO.db_3.21.0           
##  [67] gtools_3.9.5            biomaRt_2.64.0          RSQLite_2.4.2          
##  [70] R.methodsS3_1.8.2       utf8_1.2.6              data.table_1.17.8      
##  [73] prettyunits_1.2.0       httr_1.4.7              htmlwidgets_1.6.4      
##  [76] S4Arrays_1.8.1          pkgconfig_2.0.3         gtable_0.3.6           
##  [79] blob_1.2.4              ComplexHeatmap_2.24.1   XVector_0.48.0         
##  [82] htmltools_0.5.8.1       clue_0.3-66             png_0.1-8              
##  [85] ggfun_0.2.0             ggdendro_0.2.0          tzdb_0.5.0             
##  [88] reshape2_1.4.4          rjson_0.2.23            nlme_3.1-168           
##  [91] curl_6.4.0              cachem_1.1.0            GlobalOptions_0.1.2    
##  [94] stringr_1.5.1           KernSmooth_2.23-26      parallel_4.5.0         
##  [97] AnnotationDbi_1.70.0    pillar_1.11.0           grid_4.5.0             
## [100] vctrs_0.6.5             dbplyr_2.5.0            xtable_1.8-4           
## [103] cluster_2.1.8.1         evaluate_1.0.4          readr_2.1.5            
## [106] cli_3.6.5               locfit_1.5-9.12         compiler_4.5.0         
## [109] rlang_1.1.6             crayon_1.5.3            labeling_0.4.3         
## [112] plyr_1.8.9              fs_1.6.6                stringi_1.8.7          
## [115] BiocParallel_1.42.1     Biostrings_2.76.0       lazyeval_0.2.2         
## [118] GOSemSim_2.34.0         Matrix_1.7-3            hms_1.1.3              
## [121] patchwork_1.3.1         bit64_4.6.0-1           KEGGREST_1.48.1        
## [124] igraph_2.1.4            memoise_2.0.1           bslib_0.9.0            
## [127] ggtree_3.16.3           fastmatch_1.1-6         bit_4.6.0              
## [130] ape_5.8-1               gson_0.1.0

  1. The Francis Crick Institute, ↩︎